• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "tgsi/tgsi_build.h"
24 #include "tgsi/tgsi_dump.h"
25 #include "tgsi/tgsi_scan.h"
26 #include "tgsi/tgsi_util.h"
27 
28 #include <set>
29 
30 #include "codegen/nv50_ir.h"
31 #include "codegen/nv50_ir_util.h"
32 #include "codegen/nv50_ir_build_util.h"
33 
34 namespace tgsi {
35 
36 class Source;
37 
38 static nv50_ir::operation translateOpcode(uint opcode);
39 static nv50_ir::DataFile translateFile(uint file);
40 static nv50_ir::TexTarget translateTexture(uint texTarg);
41 static nv50_ir::SVSemantic translateSysVal(uint sysval);
42 static nv50_ir::CacheMode translateCacheMode(uint qualifier);
43 static nv50_ir::ImgFormat translateImgFormat(uint format);
44 
45 class Instruction
46 {
47 public:
Instruction(const struct tgsi_full_instruction * inst)48    Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { }
49 
50    class SrcRegister
51    {
52    public:
SrcRegister(const struct tgsi_full_src_register * src)53       SrcRegister(const struct tgsi_full_src_register *src)
54          : reg(src->Register),
55            fsr(src)
56       { }
57 
SrcRegister(const struct tgsi_src_register & src)58       SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { }
59 
SrcRegister(const struct tgsi_ind_register & ind)60       SrcRegister(const struct tgsi_ind_register& ind)
61          : reg(tgsi_util_get_src_from_ind(&ind)),
62            fsr(NULL)
63       { }
64 
offsetToSrc(struct tgsi_texture_offset off)65       struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off)
66       {
67          struct tgsi_src_register reg;
68          memset(&reg, 0, sizeof(reg));
69          reg.Index = off.Index;
70          reg.File = off.File;
71          reg.SwizzleX = off.SwizzleX;
72          reg.SwizzleY = off.SwizzleY;
73          reg.SwizzleZ = off.SwizzleZ;
74          return reg;
75       }
76 
SrcRegister(const struct tgsi_texture_offset & off)77       SrcRegister(const struct tgsi_texture_offset& off) :
78          reg(offsetToSrc(off)),
79          fsr(NULL)
80       { }
81 
getFile() const82       uint getFile() const { return reg.File; }
83 
is2D() const84       bool is2D() const { return reg.Dimension; }
85 
isIndirect(int dim) const86       bool isIndirect(int dim) const
87       {
88          return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect;
89       }
90 
getIndex(int dim) const91       int getIndex(int dim) const
92       {
93          return (dim && fsr) ? fsr->Dimension.Index : reg.Index;
94       }
95 
getSwizzle(int chan) const96       int getSwizzle(int chan) const
97       {
98          return tgsi_util_get_src_register_swizzle(&reg, chan);
99       }
100 
getArrayId() const101       int getArrayId() const
102       {
103          if (isIndirect(0))
104             return fsr->Indirect.ArrayID;
105          return 0;
106       }
107 
108       nv50_ir::Modifier getMod(int chan) const;
109 
getIndirect(int dim) const110       SrcRegister getIndirect(int dim) const
111       {
112          assert(fsr && isIndirect(dim));
113          if (dim)
114             return SrcRegister(fsr->DimIndirect);
115          return SrcRegister(fsr->Indirect);
116       }
117 
getValueU32(int c,const struct nv50_ir_prog_info * info) const118       uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const
119       {
120          assert(reg.File == TGSI_FILE_IMMEDIATE);
121          assert(!reg.Absolute);
122          assert(!reg.Negate);
123          return info->immd.data[reg.Index * 4 + getSwizzle(c)];
124       }
125 
126    private:
127       const struct tgsi_src_register reg;
128       const struct tgsi_full_src_register *fsr;
129    };
130 
131    class DstRegister
132    {
133    public:
DstRegister(const struct tgsi_full_dst_register * dst)134       DstRegister(const struct tgsi_full_dst_register *dst)
135          : reg(dst->Register),
136            fdr(dst)
137       { }
138 
DstRegister(const struct tgsi_dst_register & dst)139       DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { }
140 
getFile() const141       uint getFile() const { return reg.File; }
142 
is2D() const143       bool is2D() const { return reg.Dimension; }
144 
isIndirect(int dim) const145       bool isIndirect(int dim) const
146       {
147          return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect;
148       }
149 
getIndex(int dim) const150       int getIndex(int dim) const
151       {
152          return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index;
153       }
154 
getMask() const155       unsigned int getMask() const { return reg.WriteMask; }
156 
isMasked(int chan) const157       bool isMasked(int chan) const { return !(getMask() & (1 << chan)); }
158 
getIndirect(int dim) const159       SrcRegister getIndirect(int dim) const
160       {
161          assert(fdr && isIndirect(dim));
162          if (dim)
163             return SrcRegister(fdr->DimIndirect);
164          return SrcRegister(fdr->Indirect);
165       }
166 
asSrc()167       struct tgsi_full_src_register asSrc()
168       {
169          assert(fdr);
170          return tgsi_full_src_register_from_dst(fdr);
171       }
172 
getArrayId() const173       int getArrayId() const
174       {
175          if (isIndirect(0))
176             return fdr->Indirect.ArrayID;
177          return 0;
178       }
179 
180    private:
181       const struct tgsi_dst_register reg;
182       const struct tgsi_full_dst_register *fdr;
183    };
184 
getOpcode() const185    inline uint getOpcode() const { return insn->Instruction.Opcode; }
186 
srcCount() const187    unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; }
dstCount() const188    unsigned int dstCount() const { return insn->Instruction.NumDstRegs; }
189 
190    // mask of used components of source s
191    unsigned int srcMask(unsigned int s) const;
192    unsigned int texOffsetMask() const;
193 
getSrc(unsigned int s) const194    SrcRegister getSrc(unsigned int s) const
195    {
196       assert(s < srcCount());
197       return SrcRegister(&insn->Src[s]);
198    }
199 
getDst(unsigned int d) const200    DstRegister getDst(unsigned int d) const
201    {
202       assert(d < dstCount());
203       return DstRegister(&insn->Dst[d]);
204    }
205 
getTexOffset(unsigned int i) const206    SrcRegister getTexOffset(unsigned int i) const
207    {
208       assert(i < TGSI_FULL_MAX_TEX_OFFSETS);
209       return SrcRegister(insn->TexOffsets[i]);
210    }
211 
getNumTexOffsets() const212    unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; }
213 
214    bool checkDstSrcAliasing() const;
215 
getOP() const216    inline nv50_ir::operation getOP() const {
217       return translateOpcode(getOpcode()); }
218 
219    nv50_ir::DataType inferSrcType() const;
220    nv50_ir::DataType inferDstType() const;
221 
222    nv50_ir::CondCode getSetCond() const;
223 
224    nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
225 
getImageFormat() const226    const nv50_ir::TexInstruction::ImgFormatDesc *getImageFormat() const {
227       return &nv50_ir::TexInstruction::formatTable[
228             translateImgFormat(insn->Memory.Format)];
229    }
230 
getImageTarget() const231    nv50_ir::TexTarget getImageTarget() const {
232       return translateTexture(insn->Memory.Texture);
233    }
234 
getCacheMode() const235    nv50_ir::CacheMode getCacheMode() const {
236       if (!insn->Instruction.Memory)
237          return nv50_ir::CACHE_CA;
238       return translateCacheMode(insn->Memory.Qualifier);
239    }
240 
getLabel()241    inline uint getLabel() { return insn->Label.Label; }
242 
getSaturate() const243    unsigned getSaturate() const { return insn->Instruction.Saturate; }
244 
print() const245    void print() const
246    {
247       tgsi_dump_instruction(insn, 1);
248    }
249 
250 private:
251    const struct tgsi_full_instruction *insn;
252 };
253 
texOffsetMask() const254 unsigned int Instruction::texOffsetMask() const
255 {
256    const struct tgsi_instruction_texture *tex = &insn->Texture;
257    assert(insn->Instruction.Texture);
258 
259    switch (tex->Texture) {
260    case TGSI_TEXTURE_BUFFER:
261    case TGSI_TEXTURE_1D:
262    case TGSI_TEXTURE_SHADOW1D:
263    case TGSI_TEXTURE_1D_ARRAY:
264    case TGSI_TEXTURE_SHADOW1D_ARRAY:
265       return 0x1;
266    case TGSI_TEXTURE_2D:
267    case TGSI_TEXTURE_SHADOW2D:
268    case TGSI_TEXTURE_2D_ARRAY:
269    case TGSI_TEXTURE_SHADOW2D_ARRAY:
270    case TGSI_TEXTURE_RECT:
271    case TGSI_TEXTURE_SHADOWRECT:
272    case TGSI_TEXTURE_2D_MSAA:
273    case TGSI_TEXTURE_2D_ARRAY_MSAA:
274       return 0x3;
275    case TGSI_TEXTURE_3D:
276       return 0x7;
277    default:
278       assert(!"Unexpected texture target");
279       return 0xf;
280    }
281 }
282 
srcMask(unsigned int s) const283 unsigned int Instruction::srcMask(unsigned int s) const
284 {
285    unsigned int mask = insn->Dst[0].Register.WriteMask;
286 
287    switch (insn->Instruction.Opcode) {
288    case TGSI_OPCODE_COS:
289    case TGSI_OPCODE_SIN:
290       return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
291    case TGSI_OPCODE_DP2:
292       return 0x3;
293    case TGSI_OPCODE_DP3:
294       return 0x7;
295    case TGSI_OPCODE_DP4:
296    case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */
297       return 0xf;
298    case TGSI_OPCODE_DST:
299       return mask & (s ? 0xa : 0x6);
300    case TGSI_OPCODE_EX2:
301    case TGSI_OPCODE_EXP:
302    case TGSI_OPCODE_LG2:
303    case TGSI_OPCODE_LOG:
304    case TGSI_OPCODE_POW:
305    case TGSI_OPCODE_RCP:
306    case TGSI_OPCODE_RSQ:
307       return 0x1;
308    case TGSI_OPCODE_IF:
309    case TGSI_OPCODE_UIF:
310       return 0x1;
311    case TGSI_OPCODE_LIT:
312       return 0xb;
313    case TGSI_OPCODE_TEX2:
314    case TGSI_OPCODE_TXB2:
315    case TGSI_OPCODE_TXL2:
316       return (s == 0) ? 0xf : 0x3;
317    case TGSI_OPCODE_TEX:
318    case TGSI_OPCODE_TXB:
319    case TGSI_OPCODE_TXD:
320    case TGSI_OPCODE_TXL:
321    case TGSI_OPCODE_TXP:
322    case TGSI_OPCODE_TXF:
323    case TGSI_OPCODE_TG4:
324    case TGSI_OPCODE_TEX_LZ:
325    case TGSI_OPCODE_TXF_LZ:
326    case TGSI_OPCODE_LODQ:
327    {
328       const struct tgsi_instruction_texture *tex = &insn->Texture;
329 
330       assert(insn->Instruction.Texture);
331 
332       mask = 0x7;
333       if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
334           insn->Instruction.Opcode != TGSI_OPCODE_TEX_LZ &&
335           insn->Instruction.Opcode != TGSI_OPCODE_TXF_LZ &&
336           insn->Instruction.Opcode != TGSI_OPCODE_TXD)
337          mask |= 0x8; /* bias, lod or proj */
338 
339       switch (tex->Texture) {
340       case TGSI_TEXTURE_1D:
341          mask &= 0x9;
342          break;
343       case TGSI_TEXTURE_SHADOW1D:
344          mask &= 0xd;
345          break;
346       case TGSI_TEXTURE_1D_ARRAY:
347       case TGSI_TEXTURE_2D:
348       case TGSI_TEXTURE_RECT:
349          mask &= 0xb;
350          break;
351       case TGSI_TEXTURE_CUBE_ARRAY:
352       case TGSI_TEXTURE_SHADOW2D_ARRAY:
353       case TGSI_TEXTURE_SHADOWCUBE:
354       case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
355          mask |= 0x8;
356          break;
357       default:
358          break;
359       }
360    }
361       return mask;
362    case TGSI_OPCODE_TXQ:
363       return 1;
364    case TGSI_OPCODE_D2I:
365    case TGSI_OPCODE_D2U:
366    case TGSI_OPCODE_D2F:
367    case TGSI_OPCODE_DSLT:
368    case TGSI_OPCODE_DSGE:
369    case TGSI_OPCODE_DSEQ:
370    case TGSI_OPCODE_DSNE:
371    case TGSI_OPCODE_U64SEQ:
372    case TGSI_OPCODE_U64SNE:
373    case TGSI_OPCODE_I64SLT:
374    case TGSI_OPCODE_U64SLT:
375    case TGSI_OPCODE_I64SGE:
376    case TGSI_OPCODE_U64SGE:
377    case TGSI_OPCODE_I642F:
378    case TGSI_OPCODE_U642F:
379       switch (util_bitcount(mask)) {
380       case 1: return 0x3;
381       case 2: return 0xf;
382       default:
383          assert(!"unexpected mask");
384          return 0xf;
385       }
386    case TGSI_OPCODE_I2D:
387    case TGSI_OPCODE_U2D:
388    case TGSI_OPCODE_F2D: {
389       unsigned int x = 0;
390       if ((mask & 0x3) == 0x3)
391          x |= 1;
392       if ((mask & 0xc) == 0xc)
393          x |= 2;
394       return x;
395    }
396    case TGSI_OPCODE_PK2H:
397       return 0x3;
398    case TGSI_OPCODE_UP2H:
399       return 0x1;
400    default:
401       break;
402    }
403 
404    return mask;
405 }
406 
getMod(int chan) const407 nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const
408 {
409    nv50_ir::Modifier m(0);
410 
411    if (reg.Absolute)
412       m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS);
413    if (reg.Negate)
414       m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG);
415    return m;
416 }
417 
translateFile(uint file)418 static nv50_ir::DataFile translateFile(uint file)
419 {
420    switch (file) {
421    case TGSI_FILE_CONSTANT:        return nv50_ir::FILE_MEMORY_CONST;
422    case TGSI_FILE_INPUT:           return nv50_ir::FILE_SHADER_INPUT;
423    case TGSI_FILE_OUTPUT:          return nv50_ir::FILE_SHADER_OUTPUT;
424    case TGSI_FILE_TEMPORARY:       return nv50_ir::FILE_GPR;
425    case TGSI_FILE_ADDRESS:         return nv50_ir::FILE_ADDRESS;
426    case TGSI_FILE_IMMEDIATE:       return nv50_ir::FILE_IMMEDIATE;
427    case TGSI_FILE_SYSTEM_VALUE:    return nv50_ir::FILE_SYSTEM_VALUE;
428    case TGSI_FILE_BUFFER:          return nv50_ir::FILE_MEMORY_BUFFER;
429    case TGSI_FILE_IMAGE:           return nv50_ir::FILE_MEMORY_GLOBAL;
430    case TGSI_FILE_MEMORY:          return nv50_ir::FILE_MEMORY_GLOBAL;
431    case TGSI_FILE_SAMPLER:
432    case TGSI_FILE_NULL:
433    default:
434       return nv50_ir::FILE_NULL;
435    }
436 }
437 
translateSysVal(uint sysval)438 static nv50_ir::SVSemantic translateSysVal(uint sysval)
439 {
440    switch (sysval) {
441    case TGSI_SEMANTIC_FACE:       return nv50_ir::SV_FACE;
442    case TGSI_SEMANTIC_PSIZE:      return nv50_ir::SV_POINT_SIZE;
443    case TGSI_SEMANTIC_PRIMID:     return nv50_ir::SV_PRIMITIVE_ID;
444    case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID;
445    case TGSI_SEMANTIC_VERTEXID:   return nv50_ir::SV_VERTEX_ID;
446    case TGSI_SEMANTIC_GRID_SIZE:  return nv50_ir::SV_NCTAID;
447    case TGSI_SEMANTIC_BLOCK_ID:   return nv50_ir::SV_CTAID;
448    case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID;
449    case TGSI_SEMANTIC_THREAD_ID:  return nv50_ir::SV_TID;
450    case TGSI_SEMANTIC_SAMPLEID:   return nv50_ir::SV_SAMPLE_INDEX;
451    case TGSI_SEMANTIC_SAMPLEPOS:  return nv50_ir::SV_SAMPLE_POS;
452    case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK;
453    case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID;
454    case TGSI_SEMANTIC_TESSCOORD:  return nv50_ir::SV_TESS_COORD;
455    case TGSI_SEMANTIC_TESSOUTER:  return nv50_ir::SV_TESS_OUTER;
456    case TGSI_SEMANTIC_TESSINNER:  return nv50_ir::SV_TESS_INNER;
457    case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
458    case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
459    case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
460    case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
461    case TGSI_SEMANTIC_DRAWID:     return nv50_ir::SV_DRAWID;
462    case TGSI_SEMANTIC_WORK_DIM:   return nv50_ir::SV_WORK_DIM;
463    case TGSI_SEMANTIC_SUBGROUP_INVOCATION: return nv50_ir::SV_LANEID;
464    case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: return nv50_ir::SV_LANEMASK_EQ;
465    case TGSI_SEMANTIC_SUBGROUP_LT_MASK: return nv50_ir::SV_LANEMASK_LT;
466    case TGSI_SEMANTIC_SUBGROUP_LE_MASK: return nv50_ir::SV_LANEMASK_LE;
467    case TGSI_SEMANTIC_SUBGROUP_GT_MASK: return nv50_ir::SV_LANEMASK_GT;
468    case TGSI_SEMANTIC_SUBGROUP_GE_MASK: return nv50_ir::SV_LANEMASK_GE;
469    default:
470       assert(0);
471       return nv50_ir::SV_CLOCK;
472    }
473 }
474 
475 #define NV50_IR_TEX_TARG_CASE(a, b) \
476    case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b;
477 
translateTexture(uint tex)478 static nv50_ir::TexTarget translateTexture(uint tex)
479 {
480    switch (tex) {
481    NV50_IR_TEX_TARG_CASE(1D, 1D);
482    NV50_IR_TEX_TARG_CASE(2D, 2D);
483    NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS);
484    NV50_IR_TEX_TARG_CASE(3D, 3D);
485    NV50_IR_TEX_TARG_CASE(CUBE, CUBE);
486    NV50_IR_TEX_TARG_CASE(RECT, RECT);
487    NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY);
488    NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY);
489    NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY);
490    NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY);
491    NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW);
492    NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW);
493    NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW);
494    NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW);
495    NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW);
496    NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW);
497    NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW);
498    NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER);
499 
500    case TGSI_TEXTURE_UNKNOWN:
501    default:
502       assert(!"invalid texture target");
503       return nv50_ir::TEX_TARGET_2D;
504    }
505 }
506 
translateCacheMode(uint qualifier)507 static nv50_ir::CacheMode translateCacheMode(uint qualifier)
508 {
509    if (qualifier & TGSI_MEMORY_VOLATILE)
510       return nv50_ir::CACHE_CV;
511    if (qualifier & TGSI_MEMORY_COHERENT)
512       return nv50_ir::CACHE_CG;
513    return nv50_ir::CACHE_CA;
514 }
515 
translateImgFormat(uint format)516 static nv50_ir::ImgFormat translateImgFormat(uint format)
517 {
518 
519 #define FMT_CASE(a, b) \
520   case PIPE_FORMAT_ ## a: return nv50_ir::FMT_ ## b
521 
522    switch (format) {
523    FMT_CASE(NONE, NONE);
524 
525    FMT_CASE(R32G32B32A32_FLOAT, RGBA32F);
526    FMT_CASE(R16G16B16A16_FLOAT, RGBA16F);
527    FMT_CASE(R32G32_FLOAT, RG32F);
528    FMT_CASE(R16G16_FLOAT, RG16F);
529    FMT_CASE(R11G11B10_FLOAT, R11G11B10F);
530    FMT_CASE(R32_FLOAT, R32F);
531    FMT_CASE(R16_FLOAT, R16F);
532 
533    FMT_CASE(R32G32B32A32_UINT, RGBA32UI);
534    FMT_CASE(R16G16B16A16_UINT, RGBA16UI);
535    FMT_CASE(R10G10B10A2_UINT, RGB10A2UI);
536    FMT_CASE(R8G8B8A8_UINT, RGBA8UI);
537    FMT_CASE(R32G32_UINT, RG32UI);
538    FMT_CASE(R16G16_UINT, RG16UI);
539    FMT_CASE(R8G8_UINT, RG8UI);
540    FMT_CASE(R32_UINT, R32UI);
541    FMT_CASE(R16_UINT, R16UI);
542    FMT_CASE(R8_UINT, R8UI);
543 
544    FMT_CASE(R32G32B32A32_SINT, RGBA32I);
545    FMT_CASE(R16G16B16A16_SINT, RGBA16I);
546    FMT_CASE(R8G8B8A8_SINT, RGBA8I);
547    FMT_CASE(R32G32_SINT, RG32I);
548    FMT_CASE(R16G16_SINT, RG16I);
549    FMT_CASE(R8G8_SINT, RG8I);
550    FMT_CASE(R32_SINT, R32I);
551    FMT_CASE(R16_SINT, R16I);
552    FMT_CASE(R8_SINT, R8I);
553 
554    FMT_CASE(R16G16B16A16_UNORM, RGBA16);
555    FMT_CASE(R10G10B10A2_UNORM, RGB10A2);
556    FMT_CASE(R8G8B8A8_UNORM, RGBA8);
557    FMT_CASE(R16G16_UNORM, RG16);
558    FMT_CASE(R8G8_UNORM, RG8);
559    FMT_CASE(R16_UNORM, R16);
560    FMT_CASE(R8_UNORM, R8);
561 
562    FMT_CASE(R16G16B16A16_SNORM, RGBA16_SNORM);
563    FMT_CASE(R8G8B8A8_SNORM, RGBA8_SNORM);
564    FMT_CASE(R16G16_SNORM, RG16_SNORM);
565    FMT_CASE(R8G8_SNORM, RG8_SNORM);
566    FMT_CASE(R16_SNORM, R16_SNORM);
567    FMT_CASE(R8_SNORM, R8_SNORM);
568 
569    FMT_CASE(B8G8R8A8_UNORM, BGRA8);
570    }
571 
572    assert(!"Unexpected format");
573    return nv50_ir::FMT_NONE;
574 }
575 
inferSrcType() const576 nv50_ir::DataType Instruction::inferSrcType() const
577 {
578    switch (getOpcode()) {
579    case TGSI_OPCODE_UIF:
580    case TGSI_OPCODE_AND:
581    case TGSI_OPCODE_OR:
582    case TGSI_OPCODE_XOR:
583    case TGSI_OPCODE_NOT:
584    case TGSI_OPCODE_SHL:
585    case TGSI_OPCODE_U2F:
586    case TGSI_OPCODE_U2D:
587    case TGSI_OPCODE_U2I64:
588    case TGSI_OPCODE_UADD:
589    case TGSI_OPCODE_UDIV:
590    case TGSI_OPCODE_UMOD:
591    case TGSI_OPCODE_UMAD:
592    case TGSI_OPCODE_UMUL:
593    case TGSI_OPCODE_UMUL_HI:
594    case TGSI_OPCODE_UMAX:
595    case TGSI_OPCODE_UMIN:
596    case TGSI_OPCODE_USEQ:
597    case TGSI_OPCODE_USGE:
598    case TGSI_OPCODE_USLT:
599    case TGSI_OPCODE_USNE:
600    case TGSI_OPCODE_USHR:
601    case TGSI_OPCODE_ATOMUADD:
602    case TGSI_OPCODE_ATOMXCHG:
603    case TGSI_OPCODE_ATOMCAS:
604    case TGSI_OPCODE_ATOMAND:
605    case TGSI_OPCODE_ATOMOR:
606    case TGSI_OPCODE_ATOMXOR:
607    case TGSI_OPCODE_ATOMUMIN:
608    case TGSI_OPCODE_ATOMUMAX:
609    case TGSI_OPCODE_UBFE:
610    case TGSI_OPCODE_UMSB:
611    case TGSI_OPCODE_UP2H:
612    case TGSI_OPCODE_VOTE_ALL:
613    case TGSI_OPCODE_VOTE_ANY:
614    case TGSI_OPCODE_VOTE_EQ:
615       return nv50_ir::TYPE_U32;
616    case TGSI_OPCODE_I2F:
617    case TGSI_OPCODE_I2D:
618    case TGSI_OPCODE_I2I64:
619    case TGSI_OPCODE_IDIV:
620    case TGSI_OPCODE_IMUL_HI:
621    case TGSI_OPCODE_IMAX:
622    case TGSI_OPCODE_IMIN:
623    case TGSI_OPCODE_IABS:
624    case TGSI_OPCODE_INEG:
625    case TGSI_OPCODE_ISGE:
626    case TGSI_OPCODE_ISHR:
627    case TGSI_OPCODE_ISLT:
628    case TGSI_OPCODE_ISSG:
629    case TGSI_OPCODE_MOD:
630    case TGSI_OPCODE_UARL:
631    case TGSI_OPCODE_ATOMIMIN:
632    case TGSI_OPCODE_ATOMIMAX:
633    case TGSI_OPCODE_IBFE:
634    case TGSI_OPCODE_IMSB:
635       return nv50_ir::TYPE_S32;
636    case TGSI_OPCODE_D2F:
637    case TGSI_OPCODE_D2I:
638    case TGSI_OPCODE_D2U:
639    case TGSI_OPCODE_D2I64:
640    case TGSI_OPCODE_D2U64:
641    case TGSI_OPCODE_DABS:
642    case TGSI_OPCODE_DNEG:
643    case TGSI_OPCODE_DADD:
644    case TGSI_OPCODE_DMUL:
645    case TGSI_OPCODE_DDIV:
646    case TGSI_OPCODE_DMAX:
647    case TGSI_OPCODE_DMIN:
648    case TGSI_OPCODE_DSLT:
649    case TGSI_OPCODE_DSGE:
650    case TGSI_OPCODE_DSEQ:
651    case TGSI_OPCODE_DSNE:
652    case TGSI_OPCODE_DRCP:
653    case TGSI_OPCODE_DSQRT:
654    case TGSI_OPCODE_DMAD:
655    case TGSI_OPCODE_DFMA:
656    case TGSI_OPCODE_DFRAC:
657    case TGSI_OPCODE_DRSQ:
658    case TGSI_OPCODE_DTRUNC:
659    case TGSI_OPCODE_DCEIL:
660    case TGSI_OPCODE_DFLR:
661    case TGSI_OPCODE_DROUND:
662       return nv50_ir::TYPE_F64;
663    case TGSI_OPCODE_U64SEQ:
664    case TGSI_OPCODE_U64SNE:
665    case TGSI_OPCODE_U64SLT:
666    case TGSI_OPCODE_U64SGE:
667    case TGSI_OPCODE_U64MIN:
668    case TGSI_OPCODE_U64MAX:
669    case TGSI_OPCODE_U64ADD:
670    case TGSI_OPCODE_U64MUL:
671    case TGSI_OPCODE_U64SHL:
672    case TGSI_OPCODE_U64SHR:
673    case TGSI_OPCODE_U64DIV:
674    case TGSI_OPCODE_U64MOD:
675    case TGSI_OPCODE_U642F:
676    case TGSI_OPCODE_U642D:
677       return nv50_ir::TYPE_U64;
678    case TGSI_OPCODE_I64ABS:
679    case TGSI_OPCODE_I64SSG:
680    case TGSI_OPCODE_I64NEG:
681    case TGSI_OPCODE_I64SLT:
682    case TGSI_OPCODE_I64SGE:
683    case TGSI_OPCODE_I64MIN:
684    case TGSI_OPCODE_I64MAX:
685    case TGSI_OPCODE_I64SHR:
686    case TGSI_OPCODE_I64DIV:
687    case TGSI_OPCODE_I64MOD:
688    case TGSI_OPCODE_I642F:
689    case TGSI_OPCODE_I642D:
690       return nv50_ir::TYPE_S64;
691    default:
692       return nv50_ir::TYPE_F32;
693    }
694 }
695 
inferDstType() const696 nv50_ir::DataType Instruction::inferDstType() const
697 {
698    switch (getOpcode()) {
699    case TGSI_OPCODE_D2U:
700    case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32;
701    case TGSI_OPCODE_D2I:
702    case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32;
703    case TGSI_OPCODE_FSEQ:
704    case TGSI_OPCODE_FSGE:
705    case TGSI_OPCODE_FSLT:
706    case TGSI_OPCODE_FSNE:
707    case TGSI_OPCODE_DSEQ:
708    case TGSI_OPCODE_DSGE:
709    case TGSI_OPCODE_DSLT:
710    case TGSI_OPCODE_DSNE:
711    case TGSI_OPCODE_I64SLT:
712    case TGSI_OPCODE_I64SGE:
713    case TGSI_OPCODE_U64SEQ:
714    case TGSI_OPCODE_U64SNE:
715    case TGSI_OPCODE_U64SLT:
716    case TGSI_OPCODE_U64SGE:
717    case TGSI_OPCODE_PK2H:
718       return nv50_ir::TYPE_U32;
719    case TGSI_OPCODE_I2F:
720    case TGSI_OPCODE_U2F:
721    case TGSI_OPCODE_D2F:
722    case TGSI_OPCODE_I642F:
723    case TGSI_OPCODE_U642F:
724    case TGSI_OPCODE_UP2H:
725       return nv50_ir::TYPE_F32;
726    case TGSI_OPCODE_I2D:
727    case TGSI_OPCODE_U2D:
728    case TGSI_OPCODE_F2D:
729    case TGSI_OPCODE_I642D:
730    case TGSI_OPCODE_U642D:
731       return nv50_ir::TYPE_F64;
732    case TGSI_OPCODE_I2I64:
733    case TGSI_OPCODE_U2I64:
734    case TGSI_OPCODE_F2I64:
735    case TGSI_OPCODE_D2I64:
736       return nv50_ir::TYPE_S64;
737    case TGSI_OPCODE_F2U64:
738    case TGSI_OPCODE_D2U64:
739       return nv50_ir::TYPE_U64;
740    default:
741       return inferSrcType();
742    }
743 }
744 
getSetCond() const745 nv50_ir::CondCode Instruction::getSetCond() const
746 {
747    using namespace nv50_ir;
748 
749    switch (getOpcode()) {
750    case TGSI_OPCODE_SLT:
751    case TGSI_OPCODE_ISLT:
752    case TGSI_OPCODE_USLT:
753    case TGSI_OPCODE_FSLT:
754    case TGSI_OPCODE_DSLT:
755    case TGSI_OPCODE_I64SLT:
756    case TGSI_OPCODE_U64SLT:
757       return CC_LT;
758    case TGSI_OPCODE_SLE:
759       return CC_LE;
760    case TGSI_OPCODE_SGE:
761    case TGSI_OPCODE_ISGE:
762    case TGSI_OPCODE_USGE:
763    case TGSI_OPCODE_FSGE:
764    case TGSI_OPCODE_DSGE:
765    case TGSI_OPCODE_I64SGE:
766    case TGSI_OPCODE_U64SGE:
767       return CC_GE;
768    case TGSI_OPCODE_SGT:
769       return CC_GT;
770    case TGSI_OPCODE_SEQ:
771    case TGSI_OPCODE_USEQ:
772    case TGSI_OPCODE_FSEQ:
773    case TGSI_OPCODE_DSEQ:
774    case TGSI_OPCODE_U64SEQ:
775       return CC_EQ;
776    case TGSI_OPCODE_SNE:
777    case TGSI_OPCODE_FSNE:
778    case TGSI_OPCODE_DSNE:
779    case TGSI_OPCODE_U64SNE:
780       return CC_NEU;
781    case TGSI_OPCODE_USNE:
782       return CC_NE;
783    default:
784       return CC_ALWAYS;
785    }
786 }
787 
788 #define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b
789 
translateOpcode(uint opcode)790 static nv50_ir::operation translateOpcode(uint opcode)
791 {
792    switch (opcode) {
793    NV50_IR_OPCODE_CASE(ARL, SHL);
794    NV50_IR_OPCODE_CASE(MOV, MOV);
795 
796    NV50_IR_OPCODE_CASE(RCP, RCP);
797    NV50_IR_OPCODE_CASE(RSQ, RSQ);
798    NV50_IR_OPCODE_CASE(SQRT, SQRT);
799 
800    NV50_IR_OPCODE_CASE(MUL, MUL);
801    NV50_IR_OPCODE_CASE(ADD, ADD);
802 
803    NV50_IR_OPCODE_CASE(MIN, MIN);
804    NV50_IR_OPCODE_CASE(MAX, MAX);
805    NV50_IR_OPCODE_CASE(SLT, SET);
806    NV50_IR_OPCODE_CASE(SGE, SET);
807    NV50_IR_OPCODE_CASE(MAD, MAD);
808    NV50_IR_OPCODE_CASE(FMA, FMA);
809 
810    NV50_IR_OPCODE_CASE(FLR, FLOOR);
811    NV50_IR_OPCODE_CASE(ROUND, CVT);
812    NV50_IR_OPCODE_CASE(EX2, EX2);
813    NV50_IR_OPCODE_CASE(LG2, LG2);
814    NV50_IR_OPCODE_CASE(POW, POW);
815 
816    NV50_IR_OPCODE_CASE(COS, COS);
817    NV50_IR_OPCODE_CASE(DDX, DFDX);
818    NV50_IR_OPCODE_CASE(DDX_FINE, DFDX);
819    NV50_IR_OPCODE_CASE(DDY, DFDY);
820    NV50_IR_OPCODE_CASE(DDY_FINE, DFDY);
821    NV50_IR_OPCODE_CASE(KILL, DISCARD);
822 
823    NV50_IR_OPCODE_CASE(SEQ, SET);
824    NV50_IR_OPCODE_CASE(SGT, SET);
825    NV50_IR_OPCODE_CASE(SIN, SIN);
826    NV50_IR_OPCODE_CASE(SLE, SET);
827    NV50_IR_OPCODE_CASE(SNE, SET);
828    NV50_IR_OPCODE_CASE(TEX, TEX);
829    NV50_IR_OPCODE_CASE(TXD, TXD);
830    NV50_IR_OPCODE_CASE(TXP, TEX);
831 
832    NV50_IR_OPCODE_CASE(CAL, CALL);
833    NV50_IR_OPCODE_CASE(RET, RET);
834    NV50_IR_OPCODE_CASE(CMP, SLCT);
835 
836    NV50_IR_OPCODE_CASE(TXB, TXB);
837 
838    NV50_IR_OPCODE_CASE(DIV, DIV);
839 
840    NV50_IR_OPCODE_CASE(TXL, TXL);
841    NV50_IR_OPCODE_CASE(TEX_LZ, TXL);
842 
843    NV50_IR_OPCODE_CASE(CEIL, CEIL);
844    NV50_IR_OPCODE_CASE(I2F, CVT);
845    NV50_IR_OPCODE_CASE(NOT, NOT);
846    NV50_IR_OPCODE_CASE(TRUNC, TRUNC);
847    NV50_IR_OPCODE_CASE(SHL, SHL);
848 
849    NV50_IR_OPCODE_CASE(AND, AND);
850    NV50_IR_OPCODE_CASE(OR, OR);
851    NV50_IR_OPCODE_CASE(MOD, MOD);
852    NV50_IR_OPCODE_CASE(XOR, XOR);
853    NV50_IR_OPCODE_CASE(TXF, TXF);
854    NV50_IR_OPCODE_CASE(TXF_LZ, TXF);
855    NV50_IR_OPCODE_CASE(TXQ, TXQ);
856    NV50_IR_OPCODE_CASE(TXQS, TXQ);
857    NV50_IR_OPCODE_CASE(TG4, TXG);
858    NV50_IR_OPCODE_CASE(LODQ, TXLQ);
859 
860    NV50_IR_OPCODE_CASE(EMIT, EMIT);
861    NV50_IR_OPCODE_CASE(ENDPRIM, RESTART);
862 
863    NV50_IR_OPCODE_CASE(KILL_IF, DISCARD);
864 
865    NV50_IR_OPCODE_CASE(F2I, CVT);
866    NV50_IR_OPCODE_CASE(FSEQ, SET);
867    NV50_IR_OPCODE_CASE(FSGE, SET);
868    NV50_IR_OPCODE_CASE(FSLT, SET);
869    NV50_IR_OPCODE_CASE(FSNE, SET);
870    NV50_IR_OPCODE_CASE(IDIV, DIV);
871    NV50_IR_OPCODE_CASE(IMAX, MAX);
872    NV50_IR_OPCODE_CASE(IMIN, MIN);
873    NV50_IR_OPCODE_CASE(IABS, ABS);
874    NV50_IR_OPCODE_CASE(INEG, NEG);
875    NV50_IR_OPCODE_CASE(ISGE, SET);
876    NV50_IR_OPCODE_CASE(ISHR, SHR);
877    NV50_IR_OPCODE_CASE(ISLT, SET);
878    NV50_IR_OPCODE_CASE(F2U, CVT);
879    NV50_IR_OPCODE_CASE(U2F, CVT);
880    NV50_IR_OPCODE_CASE(UADD, ADD);
881    NV50_IR_OPCODE_CASE(UDIV, DIV);
882    NV50_IR_OPCODE_CASE(UMAD, MAD);
883    NV50_IR_OPCODE_CASE(UMAX, MAX);
884    NV50_IR_OPCODE_CASE(UMIN, MIN);
885    NV50_IR_OPCODE_CASE(UMOD, MOD);
886    NV50_IR_OPCODE_CASE(UMUL, MUL);
887    NV50_IR_OPCODE_CASE(USEQ, SET);
888    NV50_IR_OPCODE_CASE(USGE, SET);
889    NV50_IR_OPCODE_CASE(USHR, SHR);
890    NV50_IR_OPCODE_CASE(USLT, SET);
891    NV50_IR_OPCODE_CASE(USNE, SET);
892 
893    NV50_IR_OPCODE_CASE(DABS, ABS);
894    NV50_IR_OPCODE_CASE(DNEG, NEG);
895    NV50_IR_OPCODE_CASE(DADD, ADD);
896    NV50_IR_OPCODE_CASE(DMUL, MUL);
897    NV50_IR_OPCODE_CASE(DDIV, DIV);
898    NV50_IR_OPCODE_CASE(DMAX, MAX);
899    NV50_IR_OPCODE_CASE(DMIN, MIN);
900    NV50_IR_OPCODE_CASE(DSLT, SET);
901    NV50_IR_OPCODE_CASE(DSGE, SET);
902    NV50_IR_OPCODE_CASE(DSEQ, SET);
903    NV50_IR_OPCODE_CASE(DSNE, SET);
904    NV50_IR_OPCODE_CASE(DRCP, RCP);
905    NV50_IR_OPCODE_CASE(DSQRT, SQRT);
906    NV50_IR_OPCODE_CASE(DMAD, MAD);
907    NV50_IR_OPCODE_CASE(DFMA, FMA);
908    NV50_IR_OPCODE_CASE(D2I, CVT);
909    NV50_IR_OPCODE_CASE(D2U, CVT);
910    NV50_IR_OPCODE_CASE(I2D, CVT);
911    NV50_IR_OPCODE_CASE(U2D, CVT);
912    NV50_IR_OPCODE_CASE(DRSQ, RSQ);
913    NV50_IR_OPCODE_CASE(DTRUNC, TRUNC);
914    NV50_IR_OPCODE_CASE(DCEIL, CEIL);
915    NV50_IR_OPCODE_CASE(DFLR, FLOOR);
916    NV50_IR_OPCODE_CASE(DROUND, CVT);
917 
918    NV50_IR_OPCODE_CASE(U64SEQ, SET);
919    NV50_IR_OPCODE_CASE(U64SNE, SET);
920    NV50_IR_OPCODE_CASE(U64SLT, SET);
921    NV50_IR_OPCODE_CASE(U64SGE, SET);
922    NV50_IR_OPCODE_CASE(I64SLT, SET);
923    NV50_IR_OPCODE_CASE(I64SGE, SET);
924    NV50_IR_OPCODE_CASE(I2I64, CVT);
925    NV50_IR_OPCODE_CASE(U2I64, CVT);
926    NV50_IR_OPCODE_CASE(F2I64, CVT);
927    NV50_IR_OPCODE_CASE(F2U64, CVT);
928    NV50_IR_OPCODE_CASE(D2I64, CVT);
929    NV50_IR_OPCODE_CASE(D2U64, CVT);
930    NV50_IR_OPCODE_CASE(I642F, CVT);
931    NV50_IR_OPCODE_CASE(U642F, CVT);
932    NV50_IR_OPCODE_CASE(I642D, CVT);
933    NV50_IR_OPCODE_CASE(U642D, CVT);
934 
935    NV50_IR_OPCODE_CASE(I64MIN, MIN);
936    NV50_IR_OPCODE_CASE(U64MIN, MIN);
937    NV50_IR_OPCODE_CASE(I64MAX, MAX);
938    NV50_IR_OPCODE_CASE(U64MAX, MAX);
939    NV50_IR_OPCODE_CASE(I64ABS, ABS);
940    NV50_IR_OPCODE_CASE(I64NEG, NEG);
941    NV50_IR_OPCODE_CASE(U64ADD, ADD);
942    NV50_IR_OPCODE_CASE(U64MUL, MUL);
943    NV50_IR_OPCODE_CASE(U64SHL, SHL);
944    NV50_IR_OPCODE_CASE(I64SHR, SHR);
945    NV50_IR_OPCODE_CASE(U64SHR, SHR);
946 
947    NV50_IR_OPCODE_CASE(IMUL_HI, MUL);
948    NV50_IR_OPCODE_CASE(UMUL_HI, MUL);
949 
950    NV50_IR_OPCODE_CASE(SAMPLE, TEX);
951    NV50_IR_OPCODE_CASE(SAMPLE_B, TXB);
952    NV50_IR_OPCODE_CASE(SAMPLE_C, TEX);
953    NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX);
954    NV50_IR_OPCODE_CASE(SAMPLE_D, TXD);
955    NV50_IR_OPCODE_CASE(SAMPLE_L, TXL);
956    NV50_IR_OPCODE_CASE(SAMPLE_I, TXF);
957    NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF);
958    NV50_IR_OPCODE_CASE(GATHER4, TXG);
959    NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ);
960 
961    NV50_IR_OPCODE_CASE(ATOMUADD, ATOM);
962    NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM);
963    NV50_IR_OPCODE_CASE(ATOMCAS, ATOM);
964    NV50_IR_OPCODE_CASE(ATOMAND, ATOM);
965    NV50_IR_OPCODE_CASE(ATOMOR, ATOM);
966    NV50_IR_OPCODE_CASE(ATOMXOR, ATOM);
967    NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM);
968    NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM);
969    NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM);
970    NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM);
971 
972    NV50_IR_OPCODE_CASE(TEX2, TEX);
973    NV50_IR_OPCODE_CASE(TXB2, TXB);
974    NV50_IR_OPCODE_CASE(TXL2, TXL);
975 
976    NV50_IR_OPCODE_CASE(IBFE, EXTBF);
977    NV50_IR_OPCODE_CASE(UBFE, EXTBF);
978    NV50_IR_OPCODE_CASE(BFI, INSBF);
979    NV50_IR_OPCODE_CASE(BREV, EXTBF);
980    NV50_IR_OPCODE_CASE(POPC, POPCNT);
981    NV50_IR_OPCODE_CASE(LSB, BFIND);
982    NV50_IR_OPCODE_CASE(IMSB, BFIND);
983    NV50_IR_OPCODE_CASE(UMSB, BFIND);
984 
985    NV50_IR_OPCODE_CASE(VOTE_ALL, VOTE);
986    NV50_IR_OPCODE_CASE(VOTE_ANY, VOTE);
987    NV50_IR_OPCODE_CASE(VOTE_EQ, VOTE);
988 
989    NV50_IR_OPCODE_CASE(BALLOT, VOTE);
990    NV50_IR_OPCODE_CASE(READ_INVOC, SHFL);
991    NV50_IR_OPCODE_CASE(READ_FIRST, SHFL);
992 
993    NV50_IR_OPCODE_CASE(END, EXIT);
994 
995    default:
996       return nv50_ir::OP_NOP;
997    }
998 }
999 
opcodeToSubOp(uint opcode)1000 static uint16_t opcodeToSubOp(uint opcode)
1001 {
1002    switch (opcode) {
1003    case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD;
1004    case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH;
1005    case TGSI_OPCODE_ATOMCAS:  return NV50_IR_SUBOP_ATOM_CAS;
1006    case TGSI_OPCODE_ATOMAND:  return NV50_IR_SUBOP_ATOM_AND;
1007    case TGSI_OPCODE_ATOMOR:   return NV50_IR_SUBOP_ATOM_OR;
1008    case TGSI_OPCODE_ATOMXOR:  return NV50_IR_SUBOP_ATOM_XOR;
1009    case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN;
1010    case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN;
1011    case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX;
1012    case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX;
1013    case TGSI_OPCODE_IMUL_HI:
1014    case TGSI_OPCODE_UMUL_HI:
1015       return NV50_IR_SUBOP_MUL_HIGH;
1016    case TGSI_OPCODE_VOTE_ALL: return NV50_IR_SUBOP_VOTE_ALL;
1017    case TGSI_OPCODE_VOTE_ANY: return NV50_IR_SUBOP_VOTE_ANY;
1018    case TGSI_OPCODE_VOTE_EQ: return NV50_IR_SUBOP_VOTE_UNI;
1019    default:
1020       return 0;
1021    }
1022 }
1023 
checkDstSrcAliasing() const1024 bool Instruction::checkDstSrcAliasing() const
1025 {
1026    if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory
1027       return false;
1028 
1029    for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) {
1030       if (insn->Src[s].Register.File == TGSI_FILE_NULL)
1031          break;
1032       if (insn->Src[s].Register.File == insn->Dst[0].Register.File &&
1033           insn->Src[s].Register.Index == insn->Dst[0].Register.Index)
1034          return true;
1035    }
1036    return false;
1037 }
1038 
1039 class Source
1040 {
1041 public:
1042    Source(struct nv50_ir_prog_info *);
1043    ~Source();
1044 
1045 public:
1046    bool scanSource();
fileSize(unsigned file) const1047    unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; }
1048 
1049 public:
1050    struct tgsi_shader_info scan;
1051    struct tgsi_full_instruction *insns;
1052    const struct tgsi_token *tokens;
1053    struct nv50_ir_prog_info *info;
1054 
1055    nv50_ir::DynArray tempArrays;
1056    nv50_ir::DynArray immdArrays;
1057 
1058    typedef nv50_ir::BuildUtil::Location Location;
1059    // these registers are per-subroutine, cannot be used for parameter passing
1060    std::set<Location> locals;
1061 
1062    std::set<int> indirectTempArrays;
1063    std::map<int, int> indirectTempOffsets;
1064    std::map<int, std::pair<int, int> > tempArrayInfo;
1065    std::vector<int> tempArrayId;
1066 
1067    int clipVertexOutput;
1068 
1069    struct TextureView {
1070       uint8_t target; // TGSI_TEXTURE_*
1071    };
1072    std::vector<TextureView> textureViews;
1073 
1074    /*
1075    struct Resource {
1076       uint8_t target; // TGSI_TEXTURE_*
1077       bool raw;
1078       uint8_t slot; // $surface index
1079    };
1080    std::vector<Resource> resources;
1081    */
1082 
1083    struct MemoryFile {
1084       uint8_t mem_type; // TGSI_MEMORY_TYPE_*
1085    };
1086    std::vector<MemoryFile> memoryFiles;
1087 
1088 private:
1089    int inferSysValDirection(unsigned sn) const;
1090    bool scanDeclaration(const struct tgsi_full_declaration *);
1091    bool scanInstruction(const struct tgsi_full_instruction *);
1092    void scanInstructionSrc(const Instruction& insn,
1093                            const Instruction::SrcRegister& src,
1094                            unsigned mask);
1095    void scanProperty(const struct tgsi_full_property *);
1096    void scanImmediate(const struct tgsi_full_immediate *);
1097 
1098    inline bool isEdgeFlagPassthrough(const Instruction&) const;
1099 };
1100 
Source(struct nv50_ir_prog_info * prog)1101 Source::Source(struct nv50_ir_prog_info *prog) : info(prog)
1102 {
1103    tokens = (const struct tgsi_token *)info->bin.source;
1104 
1105    if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
1106       tgsi_dump(tokens, 0);
1107 }
1108 
~Source()1109 Source::~Source()
1110 {
1111    if (insns)
1112       FREE(insns);
1113 
1114    if (info->immd.data)
1115       FREE(info->immd.data);
1116    if (info->immd.type)
1117       FREE(info->immd.type);
1118 }
1119 
scanSource()1120 bool Source::scanSource()
1121 {
1122    unsigned insnCount = 0;
1123    struct tgsi_parse_context parse;
1124 
1125    tgsi_scan_shader(tokens, &scan);
1126 
1127    insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions *
1128                                                   sizeof(insns[0]));
1129    if (!insns)
1130       return false;
1131 
1132    clipVertexOutput = -1;
1133 
1134    textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1);
1135    //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1);
1136    tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1);
1137    memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1);
1138 
1139    info->immd.bufSize = 0;
1140 
1141    info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1;
1142    info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
1143    info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1;
1144 
1145    if (info->type == PIPE_SHADER_FRAGMENT) {
1146       info->prop.fp.writesDepth = scan.writes_z;
1147       info->prop.fp.usesDiscard = scan.uses_kill || info->io.alphaRefBase;
1148    } else
1149    if (info->type == PIPE_SHADER_GEOMETRY) {
1150       info->prop.gp.instanceCount = 1; // default value
1151    }
1152 
1153    info->io.viewportId = -1;
1154 
1155    info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
1156    info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
1157 
1158    tgsi_parse_init(&parse, tokens);
1159    while (!tgsi_parse_end_of_tokens(&parse)) {
1160       tgsi_parse_token(&parse);
1161 
1162       switch (parse.FullToken.Token.Type) {
1163       case TGSI_TOKEN_TYPE_IMMEDIATE:
1164          scanImmediate(&parse.FullToken.FullImmediate);
1165          break;
1166       case TGSI_TOKEN_TYPE_DECLARATION:
1167          scanDeclaration(&parse.FullToken.FullDeclaration);
1168          break;
1169       case TGSI_TOKEN_TYPE_INSTRUCTION:
1170          insns[insnCount++] = parse.FullToken.FullInstruction;
1171          scanInstruction(&parse.FullToken.FullInstruction);
1172          break;
1173       case TGSI_TOKEN_TYPE_PROPERTY:
1174          scanProperty(&parse.FullToken.FullProperty);
1175          break;
1176       default:
1177          INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type);
1178          break;
1179       }
1180    }
1181    tgsi_parse_free(&parse);
1182 
1183    if (indirectTempArrays.size()) {
1184       int tempBase = 0;
1185       for (std::set<int>::const_iterator it = indirectTempArrays.begin();
1186            it != indirectTempArrays.end(); ++it) {
1187          std::pair<int, int>& info = tempArrayInfo[*it];
1188          indirectTempOffsets.insert(std::make_pair(*it, tempBase - info.first));
1189          tempBase += info.second;
1190       }
1191       info->bin.tlsSpace += tempBase * 16;
1192    }
1193 
1194    if (info->io.genUserClip > 0) {
1195       info->io.clipDistances = info->io.genUserClip;
1196 
1197       const unsigned int nOut = (info->io.genUserClip + 3) / 4;
1198 
1199       for (unsigned int n = 0; n < nOut; ++n) {
1200          unsigned int i = info->numOutputs++;
1201          info->out[i].id = i;
1202          info->out[i].sn = TGSI_SEMANTIC_CLIPDIST;
1203          info->out[i].si = n;
1204          info->out[i].mask = ((1 << info->io.clipDistances) - 1) >> (n * 4);
1205       }
1206    }
1207 
1208    return info->assignSlots(info) == 0;
1209 }
1210 
scanProperty(const struct tgsi_full_property * prop)1211 void Source::scanProperty(const struct tgsi_full_property *prop)
1212 {
1213    switch (prop->Property.PropertyName) {
1214    case TGSI_PROPERTY_GS_OUTPUT_PRIM:
1215       info->prop.gp.outputPrim = prop->u[0].Data;
1216       break;
1217    case TGSI_PROPERTY_GS_INPUT_PRIM:
1218       info->prop.gp.inputPrim = prop->u[0].Data;
1219       break;
1220    case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
1221       info->prop.gp.maxVertices = prop->u[0].Data;
1222       break;
1223    case TGSI_PROPERTY_GS_INVOCATIONS:
1224       info->prop.gp.instanceCount = prop->u[0].Data;
1225       break;
1226    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
1227       info->prop.fp.separateFragData = true;
1228       break;
1229    case TGSI_PROPERTY_FS_COORD_ORIGIN:
1230    case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
1231    case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
1232       // we don't care
1233       break;
1234    case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
1235       info->io.genUserClip = -1;
1236       break;
1237    case TGSI_PROPERTY_TCS_VERTICES_OUT:
1238       info->prop.tp.outputPatchSize = prop->u[0].Data;
1239       break;
1240    case TGSI_PROPERTY_TES_PRIM_MODE:
1241       info->prop.tp.domain = prop->u[0].Data;
1242       break;
1243    case TGSI_PROPERTY_TES_SPACING:
1244       info->prop.tp.partitioning = prop->u[0].Data;
1245       break;
1246    case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
1247       info->prop.tp.winding = prop->u[0].Data;
1248       break;
1249    case TGSI_PROPERTY_TES_POINT_MODE:
1250       if (prop->u[0].Data)
1251          info->prop.tp.outputPrim = PIPE_PRIM_POINTS;
1252       else
1253          info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
1254       break;
1255    case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
1256       info->prop.cp.numThreads[0] = prop->u[0].Data;
1257       break;
1258    case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
1259       info->prop.cp.numThreads[1] = prop->u[0].Data;
1260       break;
1261    case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
1262       info->prop.cp.numThreads[2] = prop->u[0].Data;
1263       break;
1264    case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
1265       info->io.clipDistances = prop->u[0].Data;
1266       break;
1267    case TGSI_PROPERTY_NUM_CULLDIST_ENABLED:
1268       info->io.cullDistances = prop->u[0].Data;
1269       break;
1270    case TGSI_PROPERTY_NEXT_SHADER:
1271       /* Do not need to know the next shader stage. */
1272       break;
1273    case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
1274       info->prop.fp.earlyFragTests = prop->u[0].Data;
1275       break;
1276    case TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE:
1277       info->prop.fp.postDepthCoverage = prop->u[0].Data;
1278       break;
1279    case TGSI_PROPERTY_MUL_ZERO_WINS:
1280       info->io.mul_zero_wins = prop->u[0].Data;
1281       break;
1282    default:
1283       INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
1284       break;
1285    }
1286 }
1287 
scanImmediate(const struct tgsi_full_immediate * imm)1288 void Source::scanImmediate(const struct tgsi_full_immediate *imm)
1289 {
1290    const unsigned n = info->immd.count++;
1291 
1292    assert(n < scan.immediate_count);
1293 
1294    for (int c = 0; c < 4; ++c)
1295       info->immd.data[n * 4 + c] = imm->u[c].Uint;
1296 
1297    info->immd.type[n] = imm->Immediate.DataType;
1298 }
1299 
inferSysValDirection(unsigned sn) const1300 int Source::inferSysValDirection(unsigned sn) const
1301 {
1302    switch (sn) {
1303    case TGSI_SEMANTIC_INSTANCEID:
1304    case TGSI_SEMANTIC_VERTEXID:
1305       return 1;
1306    case TGSI_SEMANTIC_LAYER:
1307 #if 0
1308    case TGSI_SEMANTIC_VIEWPORTINDEX:
1309       return 0;
1310 #endif
1311    case TGSI_SEMANTIC_PRIMID:
1312       return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0;
1313    default:
1314       return 0;
1315    }
1316 }
1317 
scanDeclaration(const struct tgsi_full_declaration * decl)1318 bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
1319 {
1320    unsigned i, c;
1321    unsigned sn = TGSI_SEMANTIC_GENERIC;
1322    unsigned si = 0;
1323    const unsigned first = decl->Range.First, last = decl->Range.Last;
1324    const int arrayId = decl->Array.ArrayID;
1325 
1326    if (decl->Declaration.Semantic) {
1327       sn = decl->Semantic.Name;
1328       si = decl->Semantic.Index;
1329    }
1330 
1331    if (decl->Declaration.Local || decl->Declaration.File == TGSI_FILE_ADDRESS) {
1332       for (i = first; i <= last; ++i) {
1333          for (c = 0; c < 4; ++c) {
1334             locals.insert(
1335                Location(decl->Declaration.File, decl->Dim.Index2D, i, c));
1336          }
1337       }
1338    }
1339 
1340    switch (decl->Declaration.File) {
1341    case TGSI_FILE_INPUT:
1342       if (info->type == PIPE_SHADER_VERTEX) {
1343          // all vertex attributes are equal
1344          for (i = first; i <= last; ++i) {
1345             info->in[i].sn = TGSI_SEMANTIC_GENERIC;
1346             info->in[i].si = i;
1347          }
1348       } else {
1349          for (i = first; i <= last; ++i, ++si) {
1350             info->in[i].id = i;
1351             info->in[i].sn = sn;
1352             info->in[i].si = si;
1353             if (info->type == PIPE_SHADER_FRAGMENT) {
1354                // translate interpolation mode
1355                switch (decl->Interp.Interpolate) {
1356                case TGSI_INTERPOLATE_CONSTANT:
1357                   info->in[i].flat = 1;
1358                   break;
1359                case TGSI_INTERPOLATE_COLOR:
1360                   info->in[i].sc = 1;
1361                   break;
1362                case TGSI_INTERPOLATE_LINEAR:
1363                   info->in[i].linear = 1;
1364                   break;
1365                default:
1366                   break;
1367                }
1368                if (decl->Interp.Location)
1369                   info->in[i].centroid = 1;
1370             }
1371 
1372             if (sn == TGSI_SEMANTIC_PATCH)
1373                info->in[i].patch = 1;
1374             if (sn == TGSI_SEMANTIC_PATCH)
1375                info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
1376          }
1377       }
1378       break;
1379    case TGSI_FILE_OUTPUT:
1380       for (i = first; i <= last; ++i, ++si) {
1381          switch (sn) {
1382          case TGSI_SEMANTIC_POSITION:
1383             if (info->type == PIPE_SHADER_FRAGMENT)
1384                info->io.fragDepth = i;
1385             else
1386             if (clipVertexOutput < 0)
1387                clipVertexOutput = i;
1388             break;
1389          case TGSI_SEMANTIC_COLOR:
1390             if (info->type == PIPE_SHADER_FRAGMENT)
1391                info->prop.fp.numColourResults++;
1392             break;
1393          case TGSI_SEMANTIC_EDGEFLAG:
1394             info->io.edgeFlagOut = i;
1395             break;
1396          case TGSI_SEMANTIC_CLIPVERTEX:
1397             clipVertexOutput = i;
1398             break;
1399          case TGSI_SEMANTIC_CLIPDIST:
1400             info->io.genUserClip = -1;
1401             break;
1402          case TGSI_SEMANTIC_SAMPLEMASK:
1403             info->io.sampleMask = i;
1404             break;
1405          case TGSI_SEMANTIC_VIEWPORT_INDEX:
1406             info->io.viewportId = i;
1407             break;
1408          case TGSI_SEMANTIC_PATCH:
1409             info->numPatchConstants = MAX2(info->numPatchConstants, si + 1);
1410             /* fallthrough */
1411          case TGSI_SEMANTIC_TESSOUTER:
1412          case TGSI_SEMANTIC_TESSINNER:
1413             info->out[i].patch = 1;
1414             break;
1415          default:
1416             break;
1417          }
1418          info->out[i].id = i;
1419          info->out[i].sn = sn;
1420          info->out[i].si = si;
1421       }
1422       break;
1423    case TGSI_FILE_SYSTEM_VALUE:
1424       switch (sn) {
1425       case TGSI_SEMANTIC_INSTANCEID:
1426          info->io.instanceId = first;
1427          break;
1428       case TGSI_SEMANTIC_VERTEXID:
1429          info->io.vertexId = first;
1430          break;
1431       case TGSI_SEMANTIC_BASEVERTEX:
1432       case TGSI_SEMANTIC_BASEINSTANCE:
1433       case TGSI_SEMANTIC_DRAWID:
1434          info->prop.vp.usesDrawParameters = true;
1435          break;
1436       case TGSI_SEMANTIC_SAMPLEID:
1437       case TGSI_SEMANTIC_SAMPLEPOS:
1438          info->prop.fp.persampleInvocation = true;
1439          break;
1440       case TGSI_SEMANTIC_SAMPLEMASK:
1441          info->prop.fp.usesSampleMaskIn = true;
1442          break;
1443       default:
1444          break;
1445       }
1446       for (i = first; i <= last; ++i, ++si) {
1447          info->sv[i].sn = sn;
1448          info->sv[i].si = si;
1449          info->sv[i].input = inferSysValDirection(sn);
1450 
1451          switch (sn) {
1452          case TGSI_SEMANTIC_TESSOUTER:
1453          case TGSI_SEMANTIC_TESSINNER:
1454             info->sv[i].patch = 1;
1455             break;
1456          }
1457       }
1458       break;
1459 /*
1460    case TGSI_FILE_RESOURCE:
1461       for (i = first; i <= last; ++i) {
1462          resources[i].target = decl->Resource.Resource;
1463          resources[i].raw = decl->Resource.Raw;
1464          resources[i].slot = i;
1465       }
1466       break;
1467 */
1468    case TGSI_FILE_SAMPLER_VIEW:
1469       for (i = first; i <= last; ++i)
1470          textureViews[i].target = decl->SamplerView.Resource;
1471       break;
1472    case TGSI_FILE_MEMORY:
1473       for (i = first; i <= last; ++i)
1474          memoryFiles[i].mem_type = decl->Declaration.MemType;
1475       break;
1476    case TGSI_FILE_NULL:
1477    case TGSI_FILE_TEMPORARY:
1478       for (i = first; i <= last; ++i)
1479          tempArrayId[i] = arrayId;
1480       if (arrayId)
1481          tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair(
1482                                                    first, last - first + 1)));
1483       break;
1484    case TGSI_FILE_ADDRESS:
1485    case TGSI_FILE_CONSTANT:
1486    case TGSI_FILE_IMMEDIATE:
1487    case TGSI_FILE_SAMPLER:
1488    case TGSI_FILE_BUFFER:
1489    case TGSI_FILE_IMAGE:
1490       break;
1491    default:
1492       ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
1493       return false;
1494    }
1495    return true;
1496 }
1497 
isEdgeFlagPassthrough(const Instruction & insn) const1498 inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
1499 {
1500    return insn.getOpcode() == TGSI_OPCODE_MOV &&
1501       insn.getDst(0).getIndex(0) == info->io.edgeFlagOut &&
1502       insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
1503 }
1504 
scanInstructionSrc(const Instruction & insn,const Instruction::SrcRegister & src,unsigned mask)1505 void Source::scanInstructionSrc(const Instruction& insn,
1506                                 const Instruction::SrcRegister& src,
1507                                 unsigned mask)
1508 {
1509    if (src.getFile() == TGSI_FILE_TEMPORARY) {
1510       if (src.isIndirect(0))
1511          indirectTempArrays.insert(src.getArrayId());
1512    } else
1513    if (src.getFile() == TGSI_FILE_OUTPUT) {
1514       if (src.isIndirect(0)) {
1515          // We don't know which one is accessed, just mark everything for
1516          // reading. This is an extremely unlikely occurrence.
1517          for (unsigned i = 0; i < info->numOutputs; ++i)
1518             info->out[i].oread = 1;
1519       } else {
1520          info->out[src.getIndex(0)].oread = 1;
1521       }
1522    }
1523    if (src.getFile() != TGSI_FILE_INPUT)
1524       return;
1525 
1526    if (src.isIndirect(0)) {
1527       for (unsigned i = 0; i < info->numInputs; ++i)
1528          info->in[i].mask = 0xf;
1529    } else {
1530       const int i = src.getIndex(0);
1531       for (unsigned c = 0; c < 4; ++c) {
1532          if (!(mask & (1 << c)))
1533             continue;
1534          int k = src.getSwizzle(c);
1535          if (k <= TGSI_SWIZZLE_W)
1536             info->in[i].mask |= 1 << k;
1537       }
1538       switch (info->in[i].sn) {
1539       case TGSI_SEMANTIC_PSIZE:
1540       case TGSI_SEMANTIC_PRIMID:
1541       case TGSI_SEMANTIC_FOG:
1542          info->in[i].mask &= 0x1;
1543          break;
1544       case TGSI_SEMANTIC_PCOORD:
1545          info->in[i].mask &= 0x3;
1546          break;
1547       default:
1548          break;
1549       }
1550    }
1551 }
1552 
scanInstruction(const struct tgsi_full_instruction * inst)1553 bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
1554 {
1555    Instruction insn(inst);
1556 
1557    if (insn.getOpcode() == TGSI_OPCODE_BARRIER)
1558       info->numBarriers = 1;
1559 
1560    if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
1561       info->prop.fp.readsFramebuffer = true;
1562 
1563    if (insn.dstCount()) {
1564       Instruction::DstRegister dst = insn.getDst(0);
1565 
1566       if (dst.getFile() == TGSI_FILE_OUTPUT) {
1567          if (dst.isIndirect(0))
1568             for (unsigned i = 0; i < info->numOutputs; ++i)
1569                info->out[i].mask = 0xf;
1570          else
1571             info->out[dst.getIndex(0)].mask |= dst.getMask();
1572 
1573          if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE ||
1574              info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID ||
1575              info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER ||
1576              info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX ||
1577              info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG)
1578             info->out[dst.getIndex(0)].mask &= 1;
1579 
1580          if (isEdgeFlagPassthrough(insn))
1581             info->io.edgeFlagIn = insn.getSrc(0).getIndex(0);
1582       } else
1583       if (dst.getFile() != TGSI_FILE_MEMORY &&
1584           insn.getOpcode() == TGSI_OPCODE_STORE) {
1585          info->io.globalAccess |= 0x2;
1586       } else
1587       if (dst.getFile() == TGSI_FILE_TEMPORARY) {
1588          if (dst.isIndirect(0))
1589             indirectTempArrays.insert(dst.getArrayId());
1590       } else
1591       if (dst.getFile() == TGSI_FILE_BUFFER ||
1592           dst.getFile() == TGSI_FILE_IMAGE ||
1593           (dst.getFile() == TGSI_FILE_MEMORY &&
1594            memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
1595          info->io.globalAccess |= 0x2;
1596       }
1597    }
1598 
1599    if (insn.srcCount() && (
1600              insn.getSrc(0).getFile() != TGSI_FILE_MEMORY ||
1601              memoryFiles[insn.getSrc(0).getIndex(0)].mem_type ==
1602              TGSI_MEMORY_TYPE_GLOBAL)) {
1603       switch (insn.getOpcode()) {
1604       case TGSI_OPCODE_ATOMUADD:
1605       case TGSI_OPCODE_ATOMXCHG:
1606       case TGSI_OPCODE_ATOMCAS:
1607       case TGSI_OPCODE_ATOMAND:
1608       case TGSI_OPCODE_ATOMOR:
1609       case TGSI_OPCODE_ATOMXOR:
1610       case TGSI_OPCODE_ATOMUMIN:
1611       case TGSI_OPCODE_ATOMIMIN:
1612       case TGSI_OPCODE_ATOMUMAX:
1613       case TGSI_OPCODE_ATOMIMAX:
1614       case TGSI_OPCODE_LOAD:
1615          info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
1616             0x1 : 0x2;
1617          break;
1618       }
1619    }
1620 
1621 
1622    for (unsigned s = 0; s < insn.srcCount(); ++s)
1623       scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s));
1624 
1625    for (unsigned s = 0; s < insn.getNumTexOffsets(); ++s)
1626       scanInstructionSrc(insn, insn.getTexOffset(s), insn.texOffsetMask());
1627 
1628    return true;
1629 }
1630 
1631 nv50_ir::TexInstruction::Target
getTexture(const tgsi::Source * code,int s) const1632 Instruction::getTexture(const tgsi::Source *code, int s) const
1633 {
1634    // XXX: indirect access
1635    unsigned int r;
1636 
1637    switch (getSrc(s).getFile()) {
1638 /*
1639    case TGSI_FILE_RESOURCE:
1640       r = getSrc(s).getIndex(0);
1641       return translateTexture(code->resources.at(r).target);
1642 */
1643    case TGSI_FILE_SAMPLER_VIEW:
1644       r = getSrc(s).getIndex(0);
1645       return translateTexture(code->textureViews.at(r).target);
1646    default:
1647       return translateTexture(insn->Texture.Texture);
1648    }
1649 }
1650 
1651 } // namespace tgsi
1652 
1653 namespace {
1654 
1655 using namespace nv50_ir;
1656 
1657 class Converter : public BuildUtil
1658 {
1659 public:
1660    Converter(Program *, const tgsi::Source *);
1661    ~Converter();
1662 
1663    bool run();
1664 
1665 private:
1666    struct Subroutine
1667    {
Subroutine__anon70d097b80111::Converter::Subroutine1668       Subroutine(Function *f) : f(f) { }
1669       Function *f;
1670       ValueMap values;
1671    };
1672 
1673    Value *shiftAddress(Value *);
1674    Value *getVertexBase(int s);
1675    Value *getOutputBase(int s);
1676    DataArray *getArrayForFile(unsigned file, int idx);
1677    Value *fetchSrc(int s, int c);
1678    Value *fetchDst(int d, int c);
1679    Value *acquireDst(int d, int c);
1680    void storeDst(int d, int c, Value *);
1681 
1682    Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr);
1683    void storeDst(const tgsi::Instruction::DstRegister dst, int c,
1684                  Value *val, Value *ptr);
1685 
1686    void adjustTempIndex(int arrayId, int &idx, int &idx2d) const;
1687    Value *applySrcMod(Value *, int s, int c);
1688 
1689    Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr);
1690    Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c);
1691    Symbol *dstToSym(tgsi::Instruction::DstRegister, int c);
1692 
1693    bool isSubGroupMask(uint8_t semantic);
1694 
1695    bool handleInstruction(const struct tgsi_full_instruction *);
1696    void exportOutputs();
1697    inline Subroutine *getSubroutine(unsigned ip);
1698    inline Subroutine *getSubroutine(Function *);
1699    inline bool isEndOfSubroutine(uint ip);
1700 
1701    void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask);
1702 
1703    // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto)
1704    void setTexRS(TexInstruction *, unsigned int& s, int R, int S);
1705    void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
1706    void handleTXF(Value *dst0[4], int R, int L_M);
1707    void handleTXQ(Value *dst0[4], enum TexQuery, int R);
1708    void handleFBFETCH(Value *dst0[4]);
1709    void handleLIT(Value *dst0[4]);
1710    void handleUserClipPlanes();
1711 
1712    // Symbol *getResourceBase(int r);
1713    void getImageCoords(std::vector<Value *>&, int s);
1714 
1715    void handleLOAD(Value *dst0[4]);
1716    void handleSTORE();
1717    void handleATOM(Value *dst0[4], DataType, uint16_t subOp);
1718 
1719    void handleINTERP(Value *dst0[4]);
1720 
1721    uint8_t translateInterpMode(const struct nv50_ir_varying *var,
1722                                operation& op);
1723    Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr);
1724 
1725    void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
1726 
1727    Value *buildDot(int dim);
1728 
1729    class BindArgumentsPass : public Pass {
1730    public:
BindArgumentsPass(Converter & conv)1731       BindArgumentsPass(Converter &conv) : conv(conv) { }
1732 
1733    private:
1734       Converter &conv;
1735       Subroutine *sub;
1736 
1737       inline const Location *getValueLocation(Subroutine *, Value *);
1738 
1739       template<typename T> inline void
1740       updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *),
1741                      T (Function::*proto));
1742 
1743       template<typename T> inline void
1744       updatePrototype(BitSet *set, void (Function::*updateSet)(),
1745                       T (Function::*proto));
1746 
1747    protected:
1748       bool visit(Function *);
visit(BasicBlock * bb)1749       bool visit(BasicBlock *bb) { return false; }
1750    };
1751 
1752 private:
1753    const tgsi::Source *code;
1754    const struct nv50_ir_prog_info *info;
1755 
1756    struct {
1757       std::map<unsigned, Subroutine> map;
1758       Subroutine *cur;
1759    } sub;
1760 
1761    uint ip; // instruction pointer
1762 
1763    tgsi::Instruction tgsi;
1764 
1765    DataType dstTy;
1766    DataType srcTy;
1767 
1768    DataArray tData; // TGSI_FILE_TEMPORARY
1769    DataArray lData; // TGSI_FILE_TEMPORARY, for indirect arrays
1770    DataArray aData; // TGSI_FILE_ADDRESS
1771    DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers)
1772 
1773    Value *zero;
1774    Value *fragCoord[4];
1775    Value *clipVtx[4];
1776 
1777    Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP)
1778    uint8_t vtxBaseValid;
1779 
1780    Value *outBase; // base address of vertex out patch (for TCP)
1781 
1782    Stack condBBs;  // fork BB, then else clause BB
1783    Stack joinBBs;  // fork BB, for inserting join ops on ENDIF
1784    Stack loopBBs;  // loop headers
1785    Stack breakBBs; // end of / after loop
1786 
1787    Value *viewport;
1788 };
1789 
1790 Symbol *
srcToSym(tgsi::Instruction::SrcRegister src,int c)1791 Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
1792 {
1793    const int swz = src.getSwizzle(c);
1794 
1795    /* TODO: Use Array ID when it's available for the index */
1796    return makeSym(src.getFile(),
1797                   src.is2D() ? src.getIndex(1) : 0,
1798                   src.getIndex(0), swz,
1799                   src.getIndex(0) * 16 + swz * 4);
1800 }
1801 
1802 Symbol *
dstToSym(tgsi::Instruction::DstRegister dst,int c)1803 Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
1804 {
1805    /* TODO: Use Array ID when it's available for the index */
1806    return makeSym(dst.getFile(),
1807                   dst.is2D() ? dst.getIndex(1) : 0,
1808                   dst.getIndex(0), c,
1809                   dst.getIndex(0) * 16 + c * 4);
1810 }
1811 
1812 Symbol *
makeSym(uint tgsiFile,int fileIdx,int idx,int c,uint32_t address)1813 Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
1814 {
1815    Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile));
1816 
1817    sym->reg.fileIndex = fileIdx;
1818 
1819    if (tgsiFile == TGSI_FILE_MEMORY) {
1820       switch (code->memoryFiles[fileIdx].mem_type) {
1821       case TGSI_MEMORY_TYPE_GLOBAL:
1822          /* No-op this is the default for TGSI_FILE_MEMORY */
1823          sym->setFile(FILE_MEMORY_GLOBAL);
1824          break;
1825       case TGSI_MEMORY_TYPE_SHARED:
1826          sym->setFile(FILE_MEMORY_SHARED);
1827          break;
1828       case TGSI_MEMORY_TYPE_INPUT:
1829          assert(prog->getType() == Program::TYPE_COMPUTE);
1830          assert(idx == -1);
1831          sym->setFile(FILE_SHADER_INPUT);
1832          address += info->prop.cp.inputOffset;
1833          break;
1834       default:
1835          assert(0); /* TODO: Add support for global and private memory */
1836       }
1837    }
1838 
1839    if (idx >= 0) {
1840       if (sym->reg.file == FILE_SHADER_INPUT)
1841          sym->setOffset(info->in[idx].slot[c] * 4);
1842       else
1843       if (sym->reg.file == FILE_SHADER_OUTPUT)
1844          sym->setOffset(info->out[idx].slot[c] * 4);
1845       else
1846       if (sym->reg.file == FILE_SYSTEM_VALUE)
1847          sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c);
1848       else
1849          sym->setOffset(address);
1850    } else {
1851       sym->setOffset(address);
1852    }
1853    return sym;
1854 }
1855 
1856 uint8_t
translateInterpMode(const struct nv50_ir_varying * var,operation & op)1857 Converter::translateInterpMode(const struct nv50_ir_varying *var, operation& op)
1858 {
1859    uint8_t mode = NV50_IR_INTERP_PERSPECTIVE;
1860 
1861    if (var->flat)
1862       mode = NV50_IR_INTERP_FLAT;
1863    else
1864    if (var->linear)
1865       mode = NV50_IR_INTERP_LINEAR;
1866    else
1867    if (var->sc)
1868       mode = NV50_IR_INTERP_SC;
1869 
1870    op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC)
1871       ? OP_PINTERP : OP_LINTERP;
1872 
1873    if (var->centroid)
1874       mode |= NV50_IR_INTERP_CENTROID;
1875 
1876    return mode;
1877 }
1878 
1879 Value *
interpolate(tgsi::Instruction::SrcRegister src,int c,Value * ptr)1880 Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
1881 {
1882    operation op;
1883 
1884    // XXX: no way to know interpolation mode if we don't know what's accessed
1885    const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 :
1886                                                       src.getIndex(0)], op);
1887 
1888    Instruction *insn = new_Instruction(func, op, TYPE_F32);
1889 
1890    insn->setDef(0, getScratch());
1891    insn->setSrc(0, srcToSym(src, c));
1892    if (op == OP_PINTERP)
1893       insn->setSrc(1, fragCoord[3]);
1894    if (ptr)
1895       insn->setIndirect(0, 0, ptr);
1896 
1897    insn->setInterpolate(mode);
1898 
1899    bb->insertTail(insn);
1900    return insn->getDef(0);
1901 }
1902 
1903 Value *
applySrcMod(Value * val,int s,int c)1904 Converter::applySrcMod(Value *val, int s, int c)
1905 {
1906    Modifier m = tgsi.getSrc(s).getMod(c);
1907    DataType ty = tgsi.inferSrcType();
1908 
1909    if (m & Modifier(NV50_IR_MOD_ABS))
1910       val = mkOp1v(OP_ABS, ty, getScratch(), val);
1911 
1912    if (m & Modifier(NV50_IR_MOD_NEG))
1913       val = mkOp1v(OP_NEG, ty, getScratch(), val);
1914 
1915    return val;
1916 }
1917 
1918 Value *
getVertexBase(int s)1919 Converter::getVertexBase(int s)
1920 {
1921    assert(s < 5);
1922    if (!(vtxBaseValid & (1 << s))) {
1923       const int index = tgsi.getSrc(s).getIndex(1);
1924       Value *rel = NULL;
1925       if (tgsi.getSrc(s).isIndirect(1))
1926          rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL);
1927       vtxBaseValid |= 1 << s;
1928       vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
1929                           mkImm(index), rel);
1930    }
1931    return vtxBase[s];
1932 }
1933 
1934 Value *
getOutputBase(int s)1935 Converter::getOutputBase(int s)
1936 {
1937    assert(s < 5);
1938    if (!(vtxBaseValid & (1 << s))) {
1939       Value *offset = loadImm(NULL, tgsi.getSrc(s).getIndex(1));
1940       if (tgsi.getSrc(s).isIndirect(1))
1941          offset = mkOp2v(OP_ADD, TYPE_U32, getSSA(),
1942                          fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL),
1943                          offset);
1944       vtxBaseValid |= 1 << s;
1945       vtxBase[s] = mkOp2v(OP_ADD, TYPE_U32, getSSA(), outBase, offset);
1946    }
1947    return vtxBase[s];
1948 }
1949 
1950 Value *
fetchSrc(int s,int c)1951 Converter::fetchSrc(int s, int c)
1952 {
1953    Value *res;
1954    Value *ptr = NULL, *dimRel = NULL;
1955 
1956    tgsi::Instruction::SrcRegister src = tgsi.getSrc(s);
1957 
1958    if (src.isIndirect(0))
1959       ptr = fetchSrc(src.getIndirect(0), 0, NULL);
1960 
1961    if (src.is2D()) {
1962       switch (src.getFile()) {
1963       case TGSI_FILE_OUTPUT:
1964          dimRel = getOutputBase(s);
1965          break;
1966       case TGSI_FILE_INPUT:
1967          dimRel = getVertexBase(s);
1968          break;
1969       case TGSI_FILE_CONSTANT:
1970          // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
1971          if (src.isIndirect(1))
1972             dimRel = fetchSrc(src.getIndirect(1), 0, 0);
1973          break;
1974       default:
1975          break;
1976       }
1977    }
1978 
1979    res = fetchSrc(src, c, ptr);
1980 
1981    if (dimRel)
1982       res->getInsn()->setIndirect(0, 1, dimRel);
1983 
1984    return applySrcMod(res, s, c);
1985 }
1986 
1987 Value *
fetchDst(int d,int c)1988 Converter::fetchDst(int d, int c)
1989 {
1990    Value *res;
1991    Value *ptr = NULL, *dimRel = NULL;
1992 
1993    tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
1994 
1995    if (dst.isIndirect(0))
1996       ptr = fetchSrc(dst.getIndirect(0), 0, NULL);
1997 
1998    if (dst.is2D()) {
1999       switch (dst.getFile()) {
2000       case TGSI_FILE_OUTPUT:
2001          assert(0); // TODO
2002          dimRel = NULL;
2003          break;
2004       case TGSI_FILE_INPUT:
2005          assert(0); // TODO
2006          dimRel = NULL;
2007          break;
2008       case TGSI_FILE_CONSTANT:
2009          // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k]
2010          if (dst.isIndirect(1))
2011             dimRel = fetchSrc(dst.getIndirect(1), 0, 0);
2012          break;
2013       default:
2014          break;
2015       }
2016    }
2017 
2018    struct tgsi_full_src_register fsr = dst.asSrc();
2019    tgsi::Instruction::SrcRegister src(&fsr);
2020    res = fetchSrc(src, c, ptr);
2021 
2022    if (dimRel)
2023       res->getInsn()->setIndirect(0, 1, dimRel);
2024 
2025    return res;
2026 }
2027 
2028 Converter::DataArray *
getArrayForFile(unsigned file,int idx)2029 Converter::getArrayForFile(unsigned file, int idx)
2030 {
2031    switch (file) {
2032    case TGSI_FILE_TEMPORARY:
2033       return idx == 0 ? &tData : &lData;
2034    case TGSI_FILE_ADDRESS:
2035       return &aData;
2036    case TGSI_FILE_OUTPUT:
2037       assert(prog->getType() == Program::TYPE_FRAGMENT);
2038       return &oData;
2039    default:
2040       assert(!"invalid/unhandled TGSI source file");
2041       return NULL;
2042    }
2043 }
2044 
2045 Value *
shiftAddress(Value * index)2046 Converter::shiftAddress(Value *index)
2047 {
2048    if (!index)
2049       return NULL;
2050    return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4));
2051 }
2052 
2053 void
adjustTempIndex(int arrayId,int & idx,int & idx2d) const2054 Converter::adjustTempIndex(int arrayId, int &idx, int &idx2d) const
2055 {
2056    std::map<int, int>::const_iterator it =
2057       code->indirectTempOffsets.find(arrayId);
2058    if (it == code->indirectTempOffsets.end())
2059       return;
2060 
2061    idx2d = 1;
2062    idx += it->second;
2063 }
2064 
2065 bool
isSubGroupMask(uint8_t semantic)2066 Converter::isSubGroupMask(uint8_t semantic)
2067 {
2068    switch (semantic) {
2069       case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
2070       case TGSI_SEMANTIC_SUBGROUP_LT_MASK:
2071       case TGSI_SEMANTIC_SUBGROUP_LE_MASK:
2072       case TGSI_SEMANTIC_SUBGROUP_GT_MASK:
2073       case TGSI_SEMANTIC_SUBGROUP_GE_MASK:
2074          return true;
2075       default:
2076          return false;
2077    }
2078 }
2079 
2080 Value *
fetchSrc(tgsi::Instruction::SrcRegister src,int c,Value * ptr)2081 Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
2082 {
2083    int idx2d = src.is2D() ? src.getIndex(1) : 0;
2084    int idx = src.getIndex(0);
2085    const int swz = src.getSwizzle(c);
2086    Instruction *ld;
2087 
2088    switch (src.getFile()) {
2089    case TGSI_FILE_IMMEDIATE:
2090       assert(!ptr);
2091       return loadImm(NULL, info->immd.data[idx * 4 + swz]);
2092    case TGSI_FILE_CONSTANT:
2093       return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr));
2094    case TGSI_FILE_INPUT:
2095       if (prog->getType() == Program::TYPE_FRAGMENT) {
2096          // don't load masked inputs, won't be assigned a slot
2097          if (!ptr && !(info->in[idx].mask & (1 << swz)))
2098             return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f);
2099          return interpolate(src, c, shiftAddress(ptr));
2100       } else
2101       if (prog->getType() == Program::TYPE_GEOMETRY) {
2102          if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_PRIMID)
2103             return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
2104          // XXX: This is going to be a problem with scalar arrays, i.e. when
2105          // we cannot assume that the address is given in units of vec4.
2106          //
2107          // nv50 and nvc0 need different things here, so let the lowering
2108          // passes decide what to do with the address
2109          if (ptr)
2110             return mkLoadv(TYPE_U32, srcToSym(src, c), ptr);
2111       }
2112       ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
2113       ld->perPatch = info->in[idx].patch;
2114       return ld->getDef(0);
2115    case TGSI_FILE_OUTPUT:
2116       assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
2117       ld = mkLoad(TYPE_U32, getSSA(), srcToSym(src, c), shiftAddress(ptr));
2118       ld->perPatch = info->out[idx].patch;
2119       return ld->getDef(0);
2120    case TGSI_FILE_SYSTEM_VALUE:
2121       assert(!ptr);
2122       if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID &&
2123           info->prop.cp.numThreads[swz] == 1)
2124          return loadImm(NULL, 0u);
2125       if (isSubGroupMask(info->sv[idx].sn) && swz > 0)
2126          return loadImm(NULL, 0u);
2127       if (info->sv[idx].sn == TGSI_SEMANTIC_SUBGROUP_SIZE)
2128          return loadImm(NULL, 32u);
2129       ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
2130       ld->perPatch = info->sv[idx].patch;
2131       return ld->getDef(0);
2132    case TGSI_FILE_TEMPORARY: {
2133       int arrayid = src.getArrayId();
2134       if (!arrayid)
2135          arrayid = code->tempArrayId[idx];
2136       adjustTempIndex(arrayid, idx, idx2d);
2137    }
2138       /* fallthrough */
2139    default:
2140       return getArrayForFile(src.getFile(), idx2d)->load(
2141          sub.cur->values, idx, swz, shiftAddress(ptr));
2142    }
2143 }
2144 
2145 Value *
acquireDst(int d,int c)2146 Converter::acquireDst(int d, int c)
2147 {
2148    const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
2149    const unsigned f = dst.getFile();
2150    int idx = dst.getIndex(0);
2151    int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
2152 
2153    if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY ||
2154        f == TGSI_FILE_IMAGE)
2155       return NULL;
2156 
2157    if (dst.isIndirect(0) ||
2158        f == TGSI_FILE_SYSTEM_VALUE ||
2159        (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT))
2160       return getScratch();
2161 
2162    if (f == TGSI_FILE_TEMPORARY) {
2163       int arrayid = dst.getArrayId();
2164       if (!arrayid)
2165          arrayid = code->tempArrayId[idx];
2166       adjustTempIndex(arrayid, idx, idx2d);
2167    }
2168 
2169    return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c);
2170 }
2171 
2172 void
storeDst(int d,int c,Value * val)2173 Converter::storeDst(int d, int c, Value *val)
2174 {
2175    const tgsi::Instruction::DstRegister dst = tgsi.getDst(d);
2176 
2177    if (tgsi.getSaturate()) {
2178       mkOp1(OP_SAT, dstTy, val, val);
2179    }
2180 
2181    Value *ptr = NULL;
2182    if (dst.isIndirect(0))
2183       ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL));
2184 
2185    if (info->io.genUserClip > 0 &&
2186        dst.getFile() == TGSI_FILE_OUTPUT &&
2187        !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) {
2188       mkMov(clipVtx[c], val);
2189       val = clipVtx[c];
2190    }
2191 
2192    storeDst(dst, c, val, ptr);
2193 }
2194 
2195 void
storeDst(const tgsi::Instruction::DstRegister dst,int c,Value * val,Value * ptr)2196 Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c,
2197                     Value *val, Value *ptr)
2198 {
2199    const unsigned f = dst.getFile();
2200    int idx = dst.getIndex(0);
2201    int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
2202 
2203    if (f == TGSI_FILE_SYSTEM_VALUE) {
2204       assert(!ptr);
2205       mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val);
2206    } else
2207    if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) {
2208 
2209       if (ptr || (info->out[idx].mask & (1 << c))) {
2210          /* Save the viewport index into a scratch register so that it can be
2211             exported at EMIT time */
2212          if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX &&
2213              prog->getType() == Program::TYPE_GEOMETRY &&
2214              viewport != NULL)
2215             mkOp1(OP_MOV, TYPE_U32, viewport, val);
2216          else
2217             mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val)->perPatch =
2218                info->out[idx].patch;
2219       }
2220    } else
2221    if (f == TGSI_FILE_TEMPORARY ||
2222        f == TGSI_FILE_ADDRESS ||
2223        f == TGSI_FILE_OUTPUT) {
2224       if (f == TGSI_FILE_TEMPORARY) {
2225          int arrayid = dst.getArrayId();
2226          if (!arrayid)
2227             arrayid = code->tempArrayId[idx];
2228          adjustTempIndex(arrayid, idx, idx2d);
2229       }
2230 
2231       getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val);
2232    } else {
2233       assert(!"invalid dst file");
2234    }
2235 }
2236 
2237 #define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \
2238    for (chan = 0; chan < 4; ++chan)                 \
2239       if (!inst.getDst(d).isMasked(chan))
2240 
2241 Value *
buildDot(int dim)2242 Converter::buildDot(int dim)
2243 {
2244    assert(dim > 0);
2245 
2246    Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
2247    Value *dotp = getScratch();
2248 
2249    mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1)
2250       ->dnz = info->io.mul_zero_wins;
2251 
2252    for (int c = 1; c < dim; ++c) {
2253       src0 = fetchSrc(0, c);
2254       src1 = fetchSrc(1, c);
2255       mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp)
2256          ->dnz = info->io.mul_zero_wins;
2257    }
2258    return dotp;
2259 }
2260 
2261 void
insertConvergenceOps(BasicBlock * conv,BasicBlock * fork)2262 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
2263 {
2264    FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
2265    join->fixed = 1;
2266    conv->insertHead(join);
2267 
2268    assert(!fork->joinAt);
2269    fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
2270    fork->insertBefore(fork->getExit(), fork->joinAt);
2271 }
2272 
2273 void
setTexRS(TexInstruction * tex,unsigned int & s,int R,int S)2274 Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
2275 {
2276    unsigned rIdx = 0, sIdx = 0;
2277 
2278    if (R >= 0 && tgsi.getSrc(R).getFile() != TGSI_FILE_SAMPLER) {
2279       // This is the bindless case. We have to get the actual value and pass
2280       // it in. This will be the complete handle.
2281       tex->tex.rIndirectSrc = s;
2282       tex->setSrc(s++, fetchSrc(R, 0));
2283       tex->setTexture(tgsi.getTexture(code, R), 0xff, 0x1f);
2284       tex->tex.bindless = true;
2285       return;
2286    }
2287 
2288    if (R >= 0)
2289       rIdx = tgsi.getSrc(R).getIndex(0);
2290    if (S >= 0)
2291       sIdx = tgsi.getSrc(S).getIndex(0);
2292 
2293    tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx);
2294 
2295    if (tgsi.getSrc(R).isIndirect(0)) {
2296       tex->tex.rIndirectSrc = s;
2297       tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL));
2298    }
2299    if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) {
2300       tex->tex.sIndirectSrc = s;
2301       tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL));
2302    }
2303 }
2304 
2305 void
handleTXQ(Value * dst0[4],enum TexQuery query,int R)2306 Converter::handleTXQ(Value *dst0[4], enum TexQuery query, int R)
2307 {
2308    TexInstruction *tex = new_TexInstruction(func, OP_TXQ);
2309    tex->tex.query = query;
2310    unsigned int c, d;
2311 
2312    for (d = 0, c = 0; c < 4; ++c) {
2313       if (!dst0[c])
2314          continue;
2315       tex->tex.mask |= 1 << c;
2316       tex->setDef(d++, dst0[c]);
2317    }
2318    if (query == TXQ_DIMS)
2319       tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level
2320    else
2321       tex->setSrc((c = 0), zero);
2322 
2323    setTexRS(tex, ++c, R, -1);
2324 
2325    bb->insertTail(tex);
2326 }
2327 
2328 void
loadProjTexCoords(Value * dst[4],Value * src[4],unsigned int mask)2329 Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask)
2330 {
2331    Value *proj = fetchSrc(0, 3);
2332    Instruction *insn = proj->getUniqueInsn();
2333    int c;
2334 
2335    if (insn->op == OP_PINTERP) {
2336       bb->insertTail(insn = cloneForward(func, insn));
2337       insn->op = OP_LINTERP;
2338       insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode());
2339       insn->setSrc(1, NULL);
2340       proj = insn->getDef(0);
2341    }
2342    proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj);
2343 
2344    for (c = 0; c < 4; ++c) {
2345       if (!(mask & (1 << c)))
2346          continue;
2347       if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP)
2348          continue;
2349       mask &= ~(1 << c);
2350 
2351       bb->insertTail(insn = cloneForward(func, insn));
2352       insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode());
2353       insn->setSrc(1, proj);
2354       dst[c] = insn->getDef(0);
2355    }
2356    if (!mask)
2357       return;
2358 
2359    proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3));
2360 
2361    for (c = 0; c < 4; ++c)
2362       if (mask & (1 << c))
2363          dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj);
2364 }
2365 
2366 // order of nv50 ir sources: x y z layer lod/bias shadow
2367 // order of TGSI TEX sources: x y z layer shadow lod/bias
2368 //  lowering will finally set the hw specific order (like array first on nvc0)
2369 void
handleTEX(Value * dst[4],int R,int S,int L,int C,int Dx,int Dy)2370 Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
2371 {
2372    Value *arg[4], *src[8];
2373    Value *lod = NULL, *shd = NULL;
2374    unsigned int s, c, d;
2375    TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
2376 
2377    TexInstruction::Target tgt = tgsi.getTexture(code, R);
2378 
2379    for (s = 0; s < tgt.getArgCount(); ++s)
2380       arg[s] = src[s] = fetchSrc(0, s);
2381 
2382    if (tgsi.getOpcode() == TGSI_OPCODE_TEX_LZ)
2383       lod = loadImm(NULL, 0);
2384    else if (texi->op == OP_TXL || texi->op == OP_TXB)
2385       lod = fetchSrc(L >> 4, L & 3);
2386 
2387    if (C == 0x0f)
2388       C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src
2389 
2390    if (tgsi.getOpcode() == TGSI_OPCODE_TG4 &&
2391        tgt == TEX_TARGET_CUBE_ARRAY_SHADOW)
2392       shd = fetchSrc(1, 0);
2393    else if (tgt.isShadow())
2394       shd = fetchSrc(C >> 4, C & 3);
2395 
2396    if (texi->op == OP_TXD) {
2397       for (c = 0; c < tgt.getDim() + tgt.isCube(); ++c) {
2398          texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
2399          texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
2400       }
2401    }
2402 
2403    // cube textures don't care about projection value, it's divided out
2404    if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) {
2405       unsigned int n = tgt.getDim();
2406       if (shd) {
2407          arg[n] = shd;
2408          ++n;
2409          assert(tgt.getDim() == tgt.getArgCount());
2410       }
2411       loadProjTexCoords(src, arg, (1 << n) - 1);
2412       if (shd)
2413          shd = src[n - 1];
2414    }
2415 
2416    for (c = 0, d = 0; c < 4; ++c) {
2417       if (dst[c]) {
2418          texi->setDef(d++, dst[c]);
2419          texi->tex.mask |= 1 << c;
2420       } else {
2421          // NOTE: maybe hook up def too, for CSE
2422       }
2423    }
2424    for (s = 0; s < tgt.getArgCount(); ++s)
2425       texi->setSrc(s, src[s]);
2426    if (lod)
2427       texi->setSrc(s++, lod);
2428    if (shd)
2429       texi->setSrc(s++, shd);
2430 
2431    setTexRS(texi, s, R, S);
2432 
2433    if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ)
2434       texi->tex.levelZero = true;
2435    if (prog->getType() != Program::TYPE_FRAGMENT &&
2436        (tgsi.getOpcode() == TGSI_OPCODE_TEX ||
2437         tgsi.getOpcode() == TGSI_OPCODE_TEX2 ||
2438         tgsi.getOpcode() == TGSI_OPCODE_TXP))
2439       texi->tex.levelZero = true;
2440    if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow())
2441       texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info);
2442 
2443    texi->tex.useOffsets = tgsi.getNumTexOffsets();
2444    for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
2445       for (c = 0; c < 3; ++c) {
2446          texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
2447          texi->offset[s][c].setInsn(texi);
2448       }
2449    }
2450 
2451    bb->insertTail(texi);
2452 }
2453 
2454 // 1st source: xyz = coordinates, w = lod/sample
2455 // 2nd source: offset
2456 void
handleTXF(Value * dst[4],int R,int L_M)2457 Converter::handleTXF(Value *dst[4], int R, int L_M)
2458 {
2459    TexInstruction *texi = new_TexInstruction(func, tgsi.getOP());
2460    int ms;
2461    unsigned int c, d, s;
2462 
2463    texi->tex.target = tgsi.getTexture(code, R);
2464 
2465    ms = texi->tex.target.isMS() ? 1 : 0;
2466    texi->tex.levelZero = ms; /* MS textures don't have mip-maps */
2467 
2468    for (c = 0, d = 0; c < 4; ++c) {
2469       if (dst[c]) {
2470          texi->setDef(d++, dst[c]);
2471          texi->tex.mask |= 1 << c;
2472       }
2473    }
2474    for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c)
2475       texi->setSrc(c, fetchSrc(0, c));
2476    if (!ms && tgsi.getOpcode() == TGSI_OPCODE_TXF_LZ)
2477       texi->setSrc(c++, loadImm(NULL, 0));
2478    else
2479       texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms
2480 
2481    setTexRS(texi, c, R, -1);
2482 
2483    texi->tex.useOffsets = tgsi.getNumTexOffsets();
2484    for (s = 0; s < tgsi.getNumTexOffsets(); ++s) {
2485       for (c = 0; c < 3; ++c) {
2486          texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL));
2487          texi->offset[s][c].setInsn(texi);
2488       }
2489    }
2490 
2491    bb->insertTail(texi);
2492 }
2493 
2494 void
handleFBFETCH(Value * dst[4])2495 Converter::handleFBFETCH(Value *dst[4])
2496 {
2497    TexInstruction *texi = new_TexInstruction(func, OP_TXF);
2498    unsigned int c, d;
2499 
2500    texi->tex.target = TEX_TARGET_2D_MS_ARRAY;
2501    texi->tex.levelZero = 1;
2502    texi->tex.useOffsets = 0;
2503 
2504    for (c = 0, d = 0; c < 4; ++c) {
2505       if (dst[c]) {
2506          texi->setDef(d++, dst[c]);
2507          texi->tex.mask |= 1 << c;
2508       }
2509    }
2510 
2511    Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0));
2512    Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1));
2513    Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0));
2514    Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0));
2515 
2516    mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z;
2517    mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z;
2518    texi->setSrc(0, x);
2519    texi->setSrc(1, y);
2520    texi->setSrc(2, z);
2521    texi->setSrc(3, ms);
2522 
2523    texi->tex.r = texi->tex.s = -1;
2524 
2525    bb->insertTail(texi);
2526 }
2527 
2528 void
handleLIT(Value * dst0[4])2529 Converter::handleLIT(Value *dst0[4])
2530 {
2531    Value *val0 = NULL;
2532    unsigned int mask = tgsi.getDst(0).getMask();
2533 
2534    if (mask & (1 << 0))
2535       loadImm(dst0[0], 1.0f);
2536 
2537    if (mask & (1 << 3))
2538       loadImm(dst0[3], 1.0f);
2539 
2540    if (mask & (3 << 1)) {
2541       val0 = getScratch();
2542       mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero);
2543       if (mask & (1 << 1))
2544          mkMov(dst0[1], val0);
2545    }
2546 
2547    if (mask & (1 << 2)) {
2548       Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3);
2549       Value *val1 = getScratch(), *val3 = getScratch();
2550 
2551       Value *pos128 = loadImm(NULL, +127.999999f);
2552       Value *neg128 = loadImm(NULL, -127.999999f);
2553 
2554       mkOp2(OP_MAX, TYPE_F32, val1, src1, zero);
2555       mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128);
2556       mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128);
2557       mkOp2(OP_POW, TYPE_F32, val3, val1, val3);
2558 
2559       mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0);
2560    }
2561 }
2562 
2563 /* Keep this around for now as reference when adding img support
2564 static inline bool
2565 isResourceSpecial(const int r)
2566 {
2567    return (r == TGSI_RESOURCE_GLOBAL ||
2568            r == TGSI_RESOURCE_LOCAL ||
2569            r == TGSI_RESOURCE_PRIVATE ||
2570            r == TGSI_RESOURCE_INPUT);
2571 }
2572 
2573 static inline bool
2574 isResourceRaw(const tgsi::Source *code, const int r)
2575 {
2576    return isResourceSpecial(r) || code->resources[r].raw;
2577 }
2578 
2579 static inline nv50_ir::TexTarget
2580 getResourceTarget(const tgsi::Source *code, int r)
2581 {
2582    if (isResourceSpecial(r))
2583       return nv50_ir::TEX_TARGET_BUFFER;
2584    return tgsi::translateTexture(code->resources.at(r).target);
2585 }
2586 
2587 Symbol *
2588 Converter::getResourceBase(const int r)
2589 {
2590    Symbol *sym = NULL;
2591 
2592    switch (r) {
2593    case TGSI_RESOURCE_GLOBAL:
2594       sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL,
2595                        info->io.auxCBSlot);
2596       break;
2597    case TGSI_RESOURCE_LOCAL:
2598       assert(prog->getType() == Program::TYPE_COMPUTE);
2599       sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32,
2600                      info->prop.cp.sharedOffset);
2601       break;
2602    case TGSI_RESOURCE_PRIVATE:
2603       sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32,
2604                      info->bin.tlsSpace);
2605       break;
2606    case TGSI_RESOURCE_INPUT:
2607       assert(prog->getType() == Program::TYPE_COMPUTE);
2608       sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32,
2609                      info->prop.cp.inputOffset);
2610       break;
2611    default:
2612       sym = new_Symbol(prog,
2613                        nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot);
2614       break;
2615    }
2616    return sym;
2617 }
2618 
2619 void
2620 Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s)
2621 {
2622    const int arg =
2623       TexInstruction::Target(getResourceTarget(code, r)).getArgCount();
2624 
2625    for (int c = 0; c < arg; ++c)
2626       coords.push_back(fetchSrc(s, c));
2627 
2628    // NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk
2629    if (r == TGSI_RESOURCE_LOCAL ||
2630        r == TGSI_RESOURCE_PRIVATE ||
2631        r == TGSI_RESOURCE_INPUT)
2632       coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS),
2633                          coords[0]);
2634 }
2635 */
2636 static inline int
partitionLoadStore(uint8_t comp[2],uint8_t size[2],uint8_t mask)2637 partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask)
2638 {
2639    int n = 0;
2640 
2641    while (mask) {
2642       if (mask & 1) {
2643          size[n]++;
2644       } else {
2645          if (size[n])
2646             comp[n = 1] = size[0] + 1;
2647          else
2648             comp[n]++;
2649       }
2650       mask >>= 1;
2651    }
2652    if (size[0] == 3) {
2653       n = 1;
2654       size[0] = (comp[0] == 1) ? 1 : 2;
2655       size[1] = 3 - size[0];
2656       comp[1] = comp[0] + size[0];
2657    }
2658    return n + 1;
2659 }
2660 
2661 void
getImageCoords(std::vector<Value * > & coords,int s)2662 Converter::getImageCoords(std::vector<Value *> &coords, int s)
2663 {
2664    TexInstruction::Target t =
2665       TexInstruction::Target(tgsi.getImageTarget());
2666    const int arg = t.getDim() + (t.isArray() || t.isCube());
2667 
2668    for (int c = 0; c < arg; ++c)
2669       coords.push_back(fetchSrc(s, c));
2670 
2671    if (t.isMS())
2672       coords.push_back(fetchSrc(s, 3));
2673 }
2674 
2675 // For raw loads, granularity is 4 byte.
2676 // Usage of the texture read mask on OP_SULDP is not allowed.
2677 void
handleLOAD(Value * dst0[4])2678 Converter::handleLOAD(Value *dst0[4])
2679 {
2680    const int r = tgsi.getSrc(0).getIndex(0);
2681    int c;
2682    std::vector<Value *> off, src, ldv, def;
2683    Value *ind = NULL;
2684 
2685    if (tgsi.getSrc(0).isIndirect(0))
2686       ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
2687 
2688    switch (tgsi.getSrc(0).getFile()) {
2689    case TGSI_FILE_BUFFER:
2690    case TGSI_FILE_MEMORY:
2691       for (c = 0; c < 4; ++c) {
2692          if (!dst0[c])
2693             continue;
2694 
2695          Value *off;
2696          Symbol *sym;
2697          uint32_t src0_component_offset = tgsi.getSrc(0).getSwizzle(c) * 4;
2698 
2699          if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
2700             off = NULL;
2701             sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2702                           tgsi.getSrc(1).getValueU32(0, info) +
2703                           src0_component_offset);
2704          } else {
2705             // yzw are ignored for buffers
2706             off = fetchSrc(1, 0);
2707             sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2708                           src0_component_offset);
2709          }
2710 
2711          Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
2712          ld->cache = tgsi.getCacheMode();
2713          if (ind)
2714             ld->setIndirect(0, 1, ind);
2715       }
2716       break;
2717    default: {
2718       getImageCoords(off, 1);
2719       def.resize(4);
2720 
2721       for (c = 0; c < 4; ++c) {
2722          if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
2723             def[c] = getScratch();
2724          else
2725             def[c] = dst0[c];
2726       }
2727 
2728       bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
2729       if (bindless)
2730          ind = fetchSrc(0, 0);
2731 
2732       TexInstruction *ld =
2733          mkTex(OP_SULDP, tgsi.getImageTarget(), 0, 0, def, off);
2734       ld->tex.mask = tgsi.getDst(0).getMask();
2735       ld->tex.format = tgsi.getImageFormat();
2736       ld->cache = tgsi.getCacheMode();
2737       ld->tex.bindless = bindless;
2738       if (!bindless)
2739          ld->tex.r = r;
2740       if (ind)
2741          ld->setIndirectR(ind);
2742 
2743       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2744          if (dst0[c] != def[c])
2745             mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
2746       break;
2747    }
2748    }
2749 
2750 
2751 /* Keep this around for now as reference when adding img support
2752    getResourceCoords(off, r, 1);
2753 
2754    if (isResourceRaw(code, r)) {
2755       uint8_t mask = 0;
2756       uint8_t comp[2] = { 0, 0 };
2757       uint8_t size[2] = { 0, 0 };
2758 
2759       Symbol *base = getResourceBase(r);
2760 
2761       // determine the base and size of the at most 2 load ops
2762       for (c = 0; c < 4; ++c)
2763          if (!tgsi.getDst(0).isMasked(c))
2764             mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X);
2765 
2766       int n = partitionLoadStore(comp, size, mask);
2767 
2768       src = off;
2769 
2770       def.resize(4); // index by component, the ones we need will be non-NULL
2771       for (c = 0; c < 4; ++c) {
2772          if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c))
2773             def[c] = dst0[c];
2774          else
2775          if (mask & (1 << c))
2776             def[c] = getScratch();
2777       }
2778 
2779       const bool useLd = isResourceSpecial(r) ||
2780          (info->io.nv50styleSurfaces &&
2781           code->resources[r].target == TGSI_TEXTURE_BUFFER);
2782 
2783       for (int i = 0; i < n; ++i) {
2784          ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]);
2785 
2786          if (comp[i]) // adjust x component of source address if necessary
2787             src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
2788                             off[0], mkImm(comp[i] * 4));
2789          else
2790             src[0] = off[0];
2791 
2792          if (useLd) {
2793             Instruction *ld =
2794                mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]);
2795             for (size_t c = 1; c < ldv.size(); ++c)
2796                ld->setDef(c, ldv[c]);
2797          } else {
2798             mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot,
2799                   0, ldv, src)->dType = typeOfSize(size[i] * 4);
2800          }
2801       }
2802    } else {
2803       def.resize(4);
2804       for (c = 0; c < 4; ++c) {
2805          if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c))
2806             def[c] = getScratch();
2807          else
2808             def[c] = dst0[c];
2809       }
2810 
2811       mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0,
2812             def, off);
2813    }
2814    FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2815       if (dst0[c] != def[c])
2816          mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]);
2817 */
2818 }
2819 
2820 // For formatted stores, the write mask on OP_SUSTP can be used.
2821 // Raw stores have to be split.
2822 void
handleSTORE()2823 Converter::handleSTORE()
2824 {
2825    const int r = tgsi.getDst(0).getIndex(0);
2826    int c;
2827    std::vector<Value *> off, src, dummy;
2828    Value *ind = NULL;
2829 
2830    if (tgsi.getDst(0).isIndirect(0))
2831       ind = fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0);
2832 
2833    switch (tgsi.getDst(0).getFile()) {
2834    case TGSI_FILE_BUFFER:
2835    case TGSI_FILE_MEMORY:
2836       for (c = 0; c < 4; ++c) {
2837          if (!(tgsi.getDst(0).getMask() & (1 << c)))
2838             continue;
2839 
2840          Symbol *sym;
2841          Value *off;
2842          if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
2843             off = NULL;
2844             sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c,
2845                           tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
2846          } else {
2847             // yzw are ignored for buffers
2848             off = fetchSrc(0, 0);
2849             sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c);
2850          }
2851 
2852          Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
2853          st->cache = tgsi.getCacheMode();
2854          if (ind)
2855             st->setIndirect(0, 1, ind);
2856       }
2857       break;
2858    default: {
2859       getImageCoords(off, 0);
2860       src = off;
2861 
2862       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2863          src.push_back(fetchSrc(1, c));
2864 
2865       bool bindless = tgsi.getDst(0).getFile() != TGSI_FILE_IMAGE;
2866       if (bindless)
2867          ind = fetchDst(0, 0);
2868 
2869       TexInstruction *st =
2870          mkTex(OP_SUSTP, tgsi.getImageTarget(), 0, 0, dummy, src);
2871       st->tex.mask = tgsi.getDst(0).getMask();
2872       st->tex.format = tgsi.getImageFormat();
2873       st->cache = tgsi.getCacheMode();
2874       st->tex.bindless = bindless;
2875       if (!bindless)
2876          st->tex.r = r;
2877       if (ind)
2878          st->setIndirectR(ind);
2879 
2880       break;
2881    }
2882    }
2883 
2884 /* Keep this around for now as reference when adding img support
2885    getResourceCoords(off, r, 0);
2886    src = off;
2887    const int s = src.size();
2888 
2889    if (isResourceRaw(code, r)) {
2890       uint8_t comp[2] = { 0, 0 };
2891       uint8_t size[2] = { 0, 0 };
2892 
2893       int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask());
2894 
2895       Symbol *base = getResourceBase(r);
2896 
2897       const bool useSt = isResourceSpecial(r) ||
2898          (info->io.nv50styleSurfaces &&
2899           code->resources[r].target == TGSI_TEXTURE_BUFFER);
2900 
2901       for (int i = 0; i < n; ++i) {
2902          if (comp[i]) // adjust x component of source address if necessary
2903             src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file),
2904                             off[0], mkImm(comp[i] * 4));
2905          else
2906             src[0] = off[0];
2907 
2908          const DataType stTy = typeOfSize(size[i] * 4);
2909 
2910          if (useSt) {
2911             Instruction *st =
2912                mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i]));
2913             for (c = 1; c < size[i]; ++c)
2914                st->setSrc(1 + c, fetchSrc(1, comp[i] + c));
2915             st->setIndirect(0, 0, src[0]);
2916          } else {
2917             // attach values to be stored
2918             src.resize(s + size[i]);
2919             for (c = 0; c < size[i]; ++c)
2920                src[s + c] = fetchSrc(1, comp[i] + c);
2921             mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot,
2922                   0, dummy, src)->setType(stTy);
2923          }
2924       }
2925    } else {
2926       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
2927          src.push_back(fetchSrc(1, c));
2928 
2929       mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0,
2930             dummy, src)->tex.mask = tgsi.getDst(0).getMask();
2931    }
2932 */
2933 }
2934 
2935 // XXX: These only work on resources with the single-component u32/s32 formats.
2936 // Therefore the result is replicated. This might not be intended by TGSI, but
2937 // operating on more than 1 component would produce undefined results because
2938 // they do not exist.
2939 void
handleATOM(Value * dst0[4],DataType ty,uint16_t subOp)2940 Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
2941 {
2942    const int r = tgsi.getSrc(0).getIndex(0);
2943    std::vector<Value *> srcv;
2944    std::vector<Value *> defv;
2945    LValue *dst = getScratch();
2946    Value *ind = NULL;
2947 
2948    if (tgsi.getSrc(0).isIndirect(0))
2949       ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
2950 
2951    switch (tgsi.getSrc(0).getFile()) {
2952    case TGSI_FILE_BUFFER:
2953    case TGSI_FILE_MEMORY:
2954       for (int c = 0; c < 4; ++c) {
2955          if (!dst0[c])
2956             continue;
2957 
2958          Instruction *insn;
2959          Value *off = fetchSrc(1, c);
2960          Value *sym;
2961          if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
2962             sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
2963                           tgsi.getSrc(1).getValueU32(c, info));
2964          else
2965             sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);
2966          if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2967             insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));
2968          else
2969             insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
2970          if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
2971             insn->setIndirect(0, 0, off);
2972          if (ind)
2973             insn->setIndirect(0, 1, ind);
2974          insn->subOp = subOp;
2975       }
2976       for (int c = 0; c < 4; ++c)
2977          if (dst0[c])
2978             dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
2979       break;
2980    default: {
2981       getImageCoords(srcv, 1);
2982       defv.push_back(dst);
2983       srcv.push_back(fetchSrc(2, 0));
2984 
2985       if (subOp == NV50_IR_SUBOP_ATOM_CAS)
2986          srcv.push_back(fetchSrc(3, 0));
2987 
2988       bool bindless = tgsi.getSrc(0).getFile() != TGSI_FILE_IMAGE;
2989       if (bindless)
2990          ind = fetchSrc(0, 0);
2991 
2992       TexInstruction *tex = mkTex(OP_SUREDP, tgsi.getImageTarget(),
2993                                   0, 0, defv, srcv);
2994       tex->subOp = subOp;
2995       tex->tex.mask = 1;
2996       tex->tex.format = tgsi.getImageFormat();
2997       tex->setType(ty);
2998       tex->tex.bindless = bindless;
2999       if (!bindless)
3000          tex->tex.r = r;
3001       if (ind)
3002          tex->setIndirectR(ind);
3003 
3004       for (int c = 0; c < 4; ++c)
3005          if (dst0[c])
3006             dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
3007       break;
3008    }
3009    }
3010 
3011 /* Keep this around for now as reference when adding img support
3012    getResourceCoords(srcv, r, 1);
3013 
3014    if (isResourceSpecial(r)) {
3015       assert(r != TGSI_RESOURCE_INPUT);
3016       Instruction *insn;
3017       insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0));
3018       insn->subOp = subOp;
3019       if (subOp == NV50_IR_SUBOP_ATOM_CAS)
3020          insn->setSrc(2, fetchSrc(3, 0));
3021       insn->setIndirect(0, 0, srcv.at(0));
3022    } else {
3023       operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP;
3024       TexTarget targ = getResourceTarget(code, r);
3025       int idx = code->resources[r].slot;
3026       defv.push_back(dst);
3027       srcv.push_back(fetchSrc(2, 0));
3028       if (subOp == NV50_IR_SUBOP_ATOM_CAS)
3029          srcv.push_back(fetchSrc(3, 0));
3030       TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv);
3031       tex->subOp = subOp;
3032       tex->tex.mask = 1;
3033       tex->setType(ty);
3034    }
3035 
3036    for (int c = 0; c < 4; ++c)
3037       if (dst0[c])
3038          dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
3039 */
3040 }
3041 
3042 void
handleINTERP(Value * dst[4])3043 Converter::handleINTERP(Value *dst[4])
3044 {
3045    // Check whether the input is linear. All other attributes ignored.
3046    Instruction *insn;
3047    Value *offset = NULL, *ptr = NULL, *w = NULL;
3048    Symbol *sym[4] = { NULL };
3049    bool linear;
3050    operation op = OP_NOP;
3051    int c, mode = 0;
3052 
3053    tgsi::Instruction::SrcRegister src = tgsi.getSrc(0);
3054 
3055    // In some odd cases, in large part due to varying packing, the source
3056    // might not actually be an input. This is illegal TGSI, but it's easier to
3057    // account for it here than it is to fix it where the TGSI is being
3058    // generated. In that case, it's going to be a straight up mov (or sequence
3059    // of mov's) from the input in question. We follow the mov chain to see
3060    // which input we need to use.
3061    if (src.getFile() != TGSI_FILE_INPUT) {
3062       if (src.isIndirect(0)) {
3063          ERROR("Ignoring indirect input interpolation\n");
3064          return;
3065       }
3066       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3067          Value *val = fetchSrc(0, c);
3068          assert(val->defs.size() == 1);
3069          insn = val->getInsn();
3070          while (insn->op == OP_MOV) {
3071             assert(insn->getSrc(0)->defs.size() == 1);
3072             insn = insn->getSrc(0)->getInsn();
3073             if (!insn) {
3074                ERROR("Miscompiling shader due to unhandled INTERP\n");
3075                return;
3076             }
3077          }
3078          if (insn->op != OP_LINTERP && insn->op != OP_PINTERP) {
3079             ERROR("Trying to interpolate non-input, this is not allowed.\n");
3080             return;
3081          }
3082          sym[c] = insn->getSrc(0)->asSym();
3083          assert(sym[c]);
3084          op = insn->op;
3085          mode = insn->ipa;
3086       }
3087    } else {
3088       if (src.isIndirect(0))
3089          ptr = fetchSrc(src.getIndirect(0), 0, NULL);
3090 
3091       // We can assume that the fixed index will point to an input of the same
3092       // interpolation type in case of an indirect.
3093       // TODO: Make use of ArrayID.
3094       linear = info->in[src.getIndex(0)].linear;
3095       if (linear) {
3096          op = OP_LINTERP;
3097          mode = NV50_IR_INTERP_LINEAR;
3098       } else {
3099          op = OP_PINTERP;
3100          mode = NV50_IR_INTERP_PERSPECTIVE;
3101       }
3102    }
3103 
3104    switch (tgsi.getOpcode()) {
3105    case TGSI_OPCODE_INTERP_CENTROID:
3106       mode |= NV50_IR_INTERP_CENTROID;
3107       break;
3108    case TGSI_OPCODE_INTERP_SAMPLE:
3109       insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), fetchSrc(1, 0));
3110       insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET;
3111       mode |= NV50_IR_INTERP_OFFSET;
3112       break;
3113    case TGSI_OPCODE_INTERP_OFFSET: {
3114       // The input in src1.xy is float, but we need a single 32-bit value
3115       // where the upper and lower 16 bits are encoded in S0.12 format. We need
3116       // to clamp the input coordinates to (-0.5, 0.4375), multiply by 4096,
3117       // and then convert to s32.
3118       Value *offs[2];
3119       for (c = 0; c < 2; c++) {
3120          offs[c] = getScratch();
3121          mkOp2(OP_MIN, TYPE_F32, offs[c], fetchSrc(1, c), loadImm(NULL, 0.4375f));
3122          mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f));
3123          mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f));
3124          mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]);
3125       }
3126       offset = mkOp3v(OP_INSBF, TYPE_U32, getScratch(),
3127                       offs[1], mkImm(0x1010), offs[0]);
3128       mode |= NV50_IR_INTERP_OFFSET;
3129       break;
3130    }
3131    }
3132 
3133    if (op == OP_PINTERP) {
3134       if (offset) {
3135          w = mkOp2v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 3), offset);
3136          mkOp1(OP_RCP, TYPE_F32, w, w);
3137       } else {
3138          w = fragCoord[3];
3139       }
3140    }
3141 
3142 
3143    FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3144       insn = mkOp1(op, TYPE_F32, dst[c], sym[c] ? sym[c] : srcToSym(src, c));
3145       if (op == OP_PINTERP)
3146          insn->setSrc(1, w);
3147       if (ptr)
3148          insn->setIndirect(0, 0, ptr);
3149       if (offset)
3150          insn->setSrc(op == OP_PINTERP ? 2 : 1, offset);
3151 
3152       insn->setInterpolate(mode);
3153    }
3154 }
3155 
3156 Converter::Subroutine *
getSubroutine(unsigned ip)3157 Converter::getSubroutine(unsigned ip)
3158 {
3159    std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
3160 
3161    if (it == sub.map.end())
3162       it = sub.map.insert(std::make_pair(
3163               ip, Subroutine(new Function(prog, "SUB", ip)))).first;
3164 
3165    return &it->second;
3166 }
3167 
3168 Converter::Subroutine *
getSubroutine(Function * f)3169 Converter::getSubroutine(Function *f)
3170 {
3171    unsigned ip = f->getLabel();
3172    std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip);
3173 
3174    if (it == sub.map.end())
3175       it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first;
3176 
3177    return &it->second;
3178 }
3179 
3180 bool
isEndOfSubroutine(uint ip)3181 Converter::isEndOfSubroutine(uint ip)
3182 {
3183    assert(ip < code->scan.num_instructions);
3184    tgsi::Instruction insn(&code->insns[ip]);
3185    return (insn.getOpcode() == TGSI_OPCODE_END ||
3186            insn.getOpcode() == TGSI_OPCODE_ENDSUB ||
3187            // does END occur at end of main or the very end ?
3188            insn.getOpcode() == TGSI_OPCODE_BGNSUB);
3189 }
3190 
3191 bool
handleInstruction(const struct tgsi_full_instruction * insn)3192 Converter::handleInstruction(const struct tgsi_full_instruction *insn)
3193 {
3194    Instruction *geni;
3195 
3196    Value *dst0[4], *rDst0[4];
3197    Value *src0, *src1, *src2, *src3;
3198    Value *val0, *val1;
3199    int c;
3200 
3201    tgsi = tgsi::Instruction(insn);
3202 
3203    bool useScratchDst = tgsi.checkDstSrcAliasing();
3204 
3205    operation op = tgsi.getOP();
3206    dstTy = tgsi.inferDstType();
3207    srcTy = tgsi.inferSrcType();
3208 
3209    unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0;
3210 
3211    if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
3212       for (c = 0; c < 4; ++c) {
3213          rDst0[c] = acquireDst(0, c);
3214          dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c];
3215       }
3216    }
3217 
3218    switch (tgsi.getOpcode()) {
3219    case TGSI_OPCODE_ADD:
3220    case TGSI_OPCODE_UADD:
3221    case TGSI_OPCODE_AND:
3222    case TGSI_OPCODE_DIV:
3223    case TGSI_OPCODE_IDIV:
3224    case TGSI_OPCODE_UDIV:
3225    case TGSI_OPCODE_MAX:
3226    case TGSI_OPCODE_MIN:
3227    case TGSI_OPCODE_IMAX:
3228    case TGSI_OPCODE_IMIN:
3229    case TGSI_OPCODE_UMAX:
3230    case TGSI_OPCODE_UMIN:
3231    case TGSI_OPCODE_MOD:
3232    case TGSI_OPCODE_UMOD:
3233    case TGSI_OPCODE_MUL:
3234    case TGSI_OPCODE_UMUL:
3235    case TGSI_OPCODE_IMUL_HI:
3236    case TGSI_OPCODE_UMUL_HI:
3237    case TGSI_OPCODE_OR:
3238    case TGSI_OPCODE_SHL:
3239    case TGSI_OPCODE_ISHR:
3240    case TGSI_OPCODE_USHR:
3241    case TGSI_OPCODE_XOR:
3242       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3243          src0 = fetchSrc(0, c);
3244          src1 = fetchSrc(1, c);
3245          geni = mkOp2(op, dstTy, dst0[c], src0, src1);
3246          geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
3247          if (op == OP_MUL && dstTy == TYPE_F32)
3248             geni->dnz = info->io.mul_zero_wins;
3249          geni->precise = insn->Instruction.Precise;
3250       }
3251       break;
3252    case TGSI_OPCODE_MAD:
3253    case TGSI_OPCODE_UMAD:
3254    case TGSI_OPCODE_FMA:
3255       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3256          src0 = fetchSrc(0, c);
3257          src1 = fetchSrc(1, c);
3258          src2 = fetchSrc(2, c);
3259          geni = mkOp3(op, dstTy, dst0[c], src0, src1, src2);
3260          if (dstTy == TYPE_F32)
3261             geni->dnz = info->io.mul_zero_wins;
3262          geni->precise = insn->Instruction.Precise;
3263       }
3264       break;
3265    case TGSI_OPCODE_MOV:
3266    case TGSI_OPCODE_CEIL:
3267    case TGSI_OPCODE_FLR:
3268    case TGSI_OPCODE_TRUNC:
3269    case TGSI_OPCODE_RCP:
3270    case TGSI_OPCODE_SQRT:
3271    case TGSI_OPCODE_IABS:
3272    case TGSI_OPCODE_INEG:
3273    case TGSI_OPCODE_NOT:
3274    case TGSI_OPCODE_DDX:
3275    case TGSI_OPCODE_DDY:
3276    case TGSI_OPCODE_DDX_FINE:
3277    case TGSI_OPCODE_DDY_FINE:
3278       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3279          mkOp1(op, dstTy, dst0[c], fetchSrc(0, c));
3280       break;
3281    case TGSI_OPCODE_RSQ:
3282       src0 = fetchSrc(0, 0);
3283       val0 = getScratch();
3284       mkOp1(OP_ABS, TYPE_F32, val0, src0);
3285       mkOp1(OP_RSQ, TYPE_F32, val0, val0);
3286       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3287          mkMov(dst0[c], val0);
3288       break;
3289    case TGSI_OPCODE_ARL:
3290    case TGSI_OPCODE_ARR:
3291       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3292          const RoundMode rnd =
3293             tgsi.getOpcode() == TGSI_OPCODE_ARR ? ROUND_N : ROUND_M;
3294          src0 = fetchSrc(0, c);
3295          mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = rnd;
3296       }
3297       break;
3298    case TGSI_OPCODE_UARL:
3299       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3300          mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c));
3301       break;
3302    case TGSI_OPCODE_POW:
3303       val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0));
3304       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3305          mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
3306       break;
3307    case TGSI_OPCODE_EX2:
3308    case TGSI_OPCODE_LG2:
3309       val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0);
3310       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3311          mkOp1(OP_MOV, TYPE_F32, dst0[c], val0);
3312       break;
3313    case TGSI_OPCODE_COS:
3314    case TGSI_OPCODE_SIN:
3315       val0 = getScratch();
3316       if (mask & 7) {
3317          mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0));
3318          mkOp1(op, TYPE_F32, val0, val0);
3319          for (c = 0; c < 3; ++c)
3320             if (dst0[c])
3321                mkMov(dst0[c], val0);
3322       }
3323       if (dst0[3]) {
3324          mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3));
3325          mkOp1(op, TYPE_F32, dst0[3], val0);
3326       }
3327       break;
3328    case TGSI_OPCODE_EXP:
3329       src0 = fetchSrc(0, 0);
3330       val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0);
3331       if (dst0[1])
3332          mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0);
3333       if (dst0[0])
3334          mkOp1(OP_EX2, TYPE_F32, dst0[0], val0);
3335       if (dst0[2])
3336          mkOp1(OP_EX2, TYPE_F32, dst0[2], src0);
3337       if (dst0[3])
3338          loadImm(dst0[3], 1.0f);
3339       break;
3340    case TGSI_OPCODE_LOG:
3341       src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0));
3342       val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0);
3343       if (dst0[0] || dst0[1])
3344          val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0);
3345       if (dst0[1]) {
3346          mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
3347          mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
3348          mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0)
3349             ->dnz = info->io.mul_zero_wins;
3350       }
3351       if (dst0[3])
3352          loadImm(dst0[3], 1.0f);
3353       break;
3354    case TGSI_OPCODE_DP2:
3355       val0 = buildDot(2);
3356       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3357          mkMov(dst0[c], val0);
3358       break;
3359    case TGSI_OPCODE_DP3:
3360       val0 = buildDot(3);
3361       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3362          mkMov(dst0[c], val0);
3363       break;
3364    case TGSI_OPCODE_DP4:
3365       val0 = buildDot(4);
3366       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3367          mkMov(dst0[c], val0);
3368       break;
3369    case TGSI_OPCODE_DST:
3370       if (dst0[0])
3371          loadImm(dst0[0], 1.0f);
3372       if (dst0[1]) {
3373          src0 = fetchSrc(0, 1);
3374          src1 = fetchSrc(1, 1);
3375          mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1)
3376             ->dnz = info->io.mul_zero_wins;
3377       }
3378       if (dst0[2])
3379          mkMov(dst0[2], fetchSrc(0, 2));
3380       if (dst0[3])
3381          mkMov(dst0[3], fetchSrc(1, 3));
3382       break;
3383    case TGSI_OPCODE_LRP:
3384       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3385          src0 = fetchSrc(0, c);
3386          src1 = fetchSrc(1, c);
3387          src2 = fetchSrc(2, c);
3388          mkOp3(OP_MAD, TYPE_F32, dst0[c],
3389                mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2)
3390             ->dnz = info->io.mul_zero_wins;
3391       }
3392       break;
3393    case TGSI_OPCODE_LIT:
3394       handleLIT(dst0);
3395       break;
3396    case TGSI_OPCODE_ISSG:
3397    case TGSI_OPCODE_SSG:
3398       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3399          src0 = fetchSrc(0, c);
3400          val0 = getScratch();
3401          val1 = getScratch();
3402          mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero);
3403          mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero);
3404          if (srcTy == TYPE_F32)
3405             mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1);
3406          else
3407             mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
3408       }
3409       break;
3410    case TGSI_OPCODE_UCMP:
3411       srcTy = TYPE_U32;
3412       /* fallthrough */
3413    case TGSI_OPCODE_CMP:
3414       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3415          src0 = fetchSrc(0, c);
3416          src1 = fetchSrc(1, c);
3417          src2 = fetchSrc(2, c);
3418          if (src1 == src2)
3419             mkMov(dst0[c], src1);
3420          else
3421             mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE,
3422                   srcTy, dst0[c], srcTy, src1, src2, src0);
3423       }
3424       break;
3425    case TGSI_OPCODE_FRC:
3426       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3427          src0 = fetchSrc(0, c);
3428          val0 = getScratch();
3429          mkOp1(OP_FLOOR, TYPE_F32, val0, src0);
3430          mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0);
3431       }
3432       break;
3433    case TGSI_OPCODE_ROUND:
3434       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3435          mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c))
3436          ->rnd = ROUND_NI;
3437       break;
3438    case TGSI_OPCODE_SLT:
3439    case TGSI_OPCODE_SGE:
3440    case TGSI_OPCODE_SEQ:
3441    case TGSI_OPCODE_SGT:
3442    case TGSI_OPCODE_SLE:
3443    case TGSI_OPCODE_SNE:
3444    case TGSI_OPCODE_FSEQ:
3445    case TGSI_OPCODE_FSGE:
3446    case TGSI_OPCODE_FSLT:
3447    case TGSI_OPCODE_FSNE:
3448    case TGSI_OPCODE_ISGE:
3449    case TGSI_OPCODE_ISLT:
3450    case TGSI_OPCODE_USEQ:
3451    case TGSI_OPCODE_USGE:
3452    case TGSI_OPCODE_USLT:
3453    case TGSI_OPCODE_USNE:
3454       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3455          src0 = fetchSrc(0, c);
3456          src1 = fetchSrc(1, c);
3457          mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
3458       }
3459       break;
3460    case TGSI_OPCODE_VOTE_ALL:
3461    case TGSI_OPCODE_VOTE_ANY:
3462    case TGSI_OPCODE_VOTE_EQ:
3463       val0 = new_LValue(func, FILE_PREDICATE);
3464       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3465          mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, c), zero);
3466          mkOp1(op, dstTy, val0, val0)
3467             ->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
3468          mkCvt(OP_CVT, TYPE_U32, dst0[c], TYPE_U8, val0);
3469       }
3470       break;
3471    case TGSI_OPCODE_BALLOT:
3472       if (!tgsi.getDst(0).isMasked(0)) {
3473          val0 = new_LValue(func, FILE_PREDICATE);
3474          mkCmp(OP_SET, CC_NE, TYPE_U32, val0, TYPE_U32, fetchSrc(0, 0), zero);
3475          mkOp1(op, TYPE_U32, dst0[0], val0)->subOp = NV50_IR_SUBOP_VOTE_ANY;
3476       }
3477       if (!tgsi.getDst(0).isMasked(1))
3478          mkMov(dst0[1], zero, TYPE_U32);
3479       break;
3480    case TGSI_OPCODE_READ_FIRST:
3481       // ReadFirstInvocationARB(src) is implemented as
3482       // ReadInvocationARB(src, findLSB(ballot(true)))
3483       val0 = getScratch();
3484       mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
3485       mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000))
3486          ->subOp = NV50_IR_SUBOP_EXTBF_REV;
3487       mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3488       src1 = val0;
3489       /* fallthrough */
3490    case TGSI_OPCODE_READ_INVOC:
3491       if (tgsi.getOpcode() == TGSI_OPCODE_READ_INVOC)
3492          src1 = fetchSrc(1, 0);
3493       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3494          geni = mkOp3(op, dstTy, dst0[c], fetchSrc(0, c), src1, mkImm(0x1f));
3495          geni->subOp = NV50_IR_SUBOP_SHFL_IDX;
3496       }
3497       break;
3498    case TGSI_OPCODE_CLOCK:
3499       // Stick the 32-bit clock into the high dword of the logical result.
3500       if (!tgsi.getDst(0).isMasked(0))
3501          mkOp1(OP_MOV, TYPE_U32, dst0[0], zero);
3502       if (!tgsi.getDst(0).isMasked(1))
3503          mkOp1(OP_RDSV, TYPE_U32, dst0[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
3504       break;
3505    case TGSI_OPCODE_KILL_IF:
3506       val0 = new_LValue(func, FILE_PREDICATE);
3507       mask = 0;
3508       for (c = 0; c < 4; ++c) {
3509          const int s = tgsi.getSrc(0).getSwizzle(c);
3510          if (mask & (1 << s))
3511             continue;
3512          mask |= 1 << s;
3513          mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero);
3514          mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0);
3515       }
3516       break;
3517    case TGSI_OPCODE_KILL:
3518       mkOp(OP_DISCARD, TYPE_NONE, NULL);
3519       break;
3520    case TGSI_OPCODE_TEX:
3521    case TGSI_OPCODE_TEX_LZ:
3522    case TGSI_OPCODE_TXB:
3523    case TGSI_OPCODE_TXL:
3524    case TGSI_OPCODE_TXP:
3525    case TGSI_OPCODE_LODQ:
3526       //              R  S     L     C    Dx    Dy
3527       handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00);
3528       break;
3529    case TGSI_OPCODE_TXD:
3530       handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20);
3531       break;
3532    case TGSI_OPCODE_TG4:
3533       handleTEX(dst0, 2, 2, 0x03, 0x0f, 0x00, 0x00);
3534       break;
3535    case TGSI_OPCODE_TEX2:
3536       handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00);
3537       break;
3538    case TGSI_OPCODE_TXB2:
3539    case TGSI_OPCODE_TXL2:
3540       handleTEX(dst0, 2, 2, 0x10, 0x0f, 0x00, 0x00);
3541       break;
3542    case TGSI_OPCODE_SAMPLE:
3543    case TGSI_OPCODE_SAMPLE_B:
3544    case TGSI_OPCODE_SAMPLE_D:
3545    case TGSI_OPCODE_SAMPLE_L:
3546    case TGSI_OPCODE_SAMPLE_C:
3547    case TGSI_OPCODE_SAMPLE_C_LZ:
3548       handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40);
3549       break;
3550    case TGSI_OPCODE_TXF_LZ:
3551    case TGSI_OPCODE_TXF:
3552       handleTXF(dst0, 1, 0x03);
3553       break;
3554    case TGSI_OPCODE_SAMPLE_I:
3555       handleTXF(dst0, 1, 0x03);
3556       break;
3557    case TGSI_OPCODE_SAMPLE_I_MS:
3558       handleTXF(dst0, 1, 0x20);
3559       break;
3560    case TGSI_OPCODE_TXQ:
3561    case TGSI_OPCODE_SVIEWINFO:
3562       handleTXQ(dst0, TXQ_DIMS, 1);
3563       break;
3564    case TGSI_OPCODE_TXQS:
3565       // The TXQ_TYPE query returns samples in its 3rd arg, but we need it to
3566       // be in .x
3567       dst0[1] = dst0[2] = dst0[3] = NULL;
3568       std::swap(dst0[0], dst0[2]);
3569       handleTXQ(dst0, TXQ_TYPE, 0);
3570       std::swap(dst0[0], dst0[2]);
3571       break;
3572    case TGSI_OPCODE_FBFETCH:
3573       handleFBFETCH(dst0);
3574       break;
3575    case TGSI_OPCODE_F2I:
3576    case TGSI_OPCODE_F2U:
3577       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3578          mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z;
3579       break;
3580    case TGSI_OPCODE_I2F:
3581    case TGSI_OPCODE_U2F:
3582       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3583          mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c));
3584       break;
3585    case TGSI_OPCODE_PK2H:
3586       val0 = getScratch();
3587       val1 = getScratch();
3588       mkCvt(OP_CVT, TYPE_F16, val0, TYPE_F32, fetchSrc(0, 0));
3589       mkCvt(OP_CVT, TYPE_F16, val1, TYPE_F32, fetchSrc(0, 1));
3590       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
3591          mkOp3(OP_INSBF, TYPE_U32, dst0[c], val1, mkImm(0x1010), val0);
3592       break;
3593    case TGSI_OPCODE_UP2H:
3594       src0 = fetchSrc(0, 0);
3595       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3596          geni = mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F16, src0);
3597          geni->subOp = c & 1;
3598       }
3599       break;
3600    case TGSI_OPCODE_EMIT:
3601       /* export the saved viewport index */
3602       if (viewport != NULL) {
3603          Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32,
3604                                   info->out[info->io.viewportId].slot[0] * 4);
3605          mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport);
3606       }
3607       /* fallthrough */
3608    case TGSI_OPCODE_ENDPRIM:
3609    {
3610       // get vertex stream (must be immediate)
3611       unsigned int stream = tgsi.getSrc(0).getValueU32(0, info);
3612       if (stream && op == OP_RESTART)
3613          break;
3614       if (info->prop.gp.maxVertices == 0)
3615          break;
3616       src0 = mkImm(stream);
3617       mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1;
3618       break;
3619    }
3620    case TGSI_OPCODE_IF:
3621    case TGSI_OPCODE_UIF:
3622    {
3623       BasicBlock *ifBB = new BasicBlock(func);
3624 
3625       bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
3626       condBBs.push(bb);
3627       joinBBs.push(bb);
3628 
3629       mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy);
3630 
3631       setPosition(ifBB, true);
3632    }
3633       break;
3634    case TGSI_OPCODE_ELSE:
3635    {
3636       BasicBlock *elseBB = new BasicBlock(func);
3637       BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
3638 
3639       forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
3640       condBBs.push(bb);
3641 
3642       forkBB->getExit()->asFlow()->target.bb = elseBB;
3643       if (!bb->isTerminated())
3644          mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
3645 
3646       setPosition(elseBB, true);
3647    }
3648       break;
3649    case TGSI_OPCODE_ENDIF:
3650    {
3651       BasicBlock *convBB = new BasicBlock(func);
3652       BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
3653       BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
3654 
3655       if (!bb->isTerminated()) {
3656          // we only want join if none of the clauses ended with CONT/BREAK/RET
3657          if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
3658             insertConvergenceOps(convBB, forkBB);
3659          mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL);
3660          bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
3661       }
3662 
3663       if (prevBB->getExit()->op == OP_BRA) {
3664          prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD);
3665          prevBB->getExit()->asFlow()->target.bb = convBB;
3666       }
3667       setPosition(convBB, true);
3668    }
3669       break;
3670    case TGSI_OPCODE_BGNLOOP:
3671    {
3672       BasicBlock *lbgnBB = new BasicBlock(func);
3673       BasicBlock *lbrkBB = new BasicBlock(func);
3674 
3675       loopBBs.push(lbgnBB);
3676       breakBBs.push(lbrkBB);
3677       if (loopBBs.getSize() > func->loopNestingBound)
3678          func->loopNestingBound++;
3679 
3680       mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL);
3681 
3682       bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE);
3683       setPosition(lbgnBB, true);
3684       mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL);
3685    }
3686       break;
3687    case TGSI_OPCODE_ENDLOOP:
3688    {
3689       BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
3690 
3691       if (!bb->isTerminated()) {
3692          mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
3693          bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
3694       }
3695       setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
3696 
3697       // If the loop never breaks (e.g. only has RET's inside), then there
3698       // will be no way to get to the break bb. However BGNLOOP will have
3699       // already made a PREBREAK to it, so it must be in the CFG.
3700       if (getBB()->cfg.incidentCount() == 0)
3701          loopBB->cfg.attach(&getBB()->cfg, Graph::Edge::TREE);
3702    }
3703       break;
3704    case TGSI_OPCODE_BRK:
3705    {
3706       if (bb->isTerminated())
3707          break;
3708       BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
3709       mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL);
3710       bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS);
3711    }
3712       break;
3713    case TGSI_OPCODE_CONT:
3714    {
3715       if (bb->isTerminated())
3716          break;
3717       BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
3718       mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
3719       contBB->explicitCont = true;
3720       bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
3721    }
3722       break;
3723    case TGSI_OPCODE_BGNSUB:
3724    {
3725       Subroutine *s = getSubroutine(ip);
3726       BasicBlock *entry = new BasicBlock(s->f);
3727       BasicBlock *leave = new BasicBlock(s->f);
3728 
3729       // multiple entrypoints possible, keep the graph connected
3730       if (prog->getType() == Program::TYPE_COMPUTE)
3731          prog->main->call.attach(&s->f->call, Graph::Edge::TREE);
3732 
3733       sub.cur = s;
3734       s->f->setEntry(entry);
3735       s->f->setExit(leave);
3736       setPosition(entry, true);
3737       return true;
3738    }
3739    case TGSI_OPCODE_ENDSUB:
3740    {
3741       sub.cur = getSubroutine(prog->main);
3742       setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true);
3743       return true;
3744    }
3745    case TGSI_OPCODE_CAL:
3746    {
3747       Subroutine *s = getSubroutine(tgsi.getLabel());
3748       mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL);
3749       func->call.attach(&s->f->call, Graph::Edge::TREE);
3750       return true;
3751    }
3752    case TGSI_OPCODE_RET:
3753    {
3754       if (bb->isTerminated())
3755          return true;
3756       BasicBlock *leave = BasicBlock::get(func->cfgExit);
3757 
3758       if (!isEndOfSubroutine(ip + 1)) {
3759          // insert a PRERET at the entry if this is an early return
3760          // (only needed for sharing code in the epilogue)
3761          BasicBlock *root = BasicBlock::get(func->cfg.getRoot());
3762          if (root->getEntry() == NULL || root->getEntry()->op != OP_PRERET) {
3763             BasicBlock *pos = getBB();
3764             setPosition(root, false);
3765             mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1;
3766             setPosition(pos, true);
3767          }
3768       }
3769       mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1;
3770       bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS);
3771    }
3772       break;
3773    case TGSI_OPCODE_END:
3774    {
3775       // attach and generate epilogue code
3776       BasicBlock *epilogue = BasicBlock::get(func->cfgExit);
3777       bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
3778       setPosition(epilogue, true);
3779       if (prog->getType() == Program::TYPE_FRAGMENT)
3780          exportOutputs();
3781       if (info->io.genUserClip > 0)
3782          handleUserClipPlanes();
3783       mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
3784    }
3785       break;
3786    case TGSI_OPCODE_SWITCH:
3787    case TGSI_OPCODE_CASE:
3788       ERROR("switch/case opcode encountered, should have been lowered\n");
3789       abort();
3790       break;
3791    case TGSI_OPCODE_LOAD:
3792       handleLOAD(dst0);
3793       break;
3794    case TGSI_OPCODE_STORE:
3795       handleSTORE();
3796       break;
3797    case TGSI_OPCODE_BARRIER:
3798       geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0));
3799       geni->fixed = 1;
3800       geni->subOp = NV50_IR_SUBOP_BAR_SYNC;
3801       break;
3802    case TGSI_OPCODE_MEMBAR:
3803    {
3804       uint32_t level = tgsi.getSrc(0).getValueU32(0, info);
3805       geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
3806       geni->fixed = 1;
3807       if (!(level & ~(TGSI_MEMBAR_THREAD_GROUP | TGSI_MEMBAR_SHARED)))
3808          geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
3809       else
3810          geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
3811    }
3812       break;
3813    case TGSI_OPCODE_ATOMUADD:
3814    case TGSI_OPCODE_ATOMXCHG:
3815    case TGSI_OPCODE_ATOMCAS:
3816    case TGSI_OPCODE_ATOMAND:
3817    case TGSI_OPCODE_ATOMOR:
3818    case TGSI_OPCODE_ATOMXOR:
3819    case TGSI_OPCODE_ATOMUMIN:
3820    case TGSI_OPCODE_ATOMIMIN:
3821    case TGSI_OPCODE_ATOMUMAX:
3822    case TGSI_OPCODE_ATOMIMAX:
3823       handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
3824       break;
3825    case TGSI_OPCODE_RESQ:
3826       if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
3827          Value *ind = NULL;
3828          if (tgsi.getSrc(0).isIndirect(0))
3829             ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
3830          geni = mkOp1(OP_BUFQ, TYPE_U32, dst0[0],
3831                       makeSym(tgsi.getSrc(0).getFile(),
3832                               tgsi.getSrc(0).getIndex(0), -1, 0, 0));
3833          if (ind)
3834             geni->setIndirect(0, 1, ind);
3835       } else {
3836          TexInstruction *texi = new_TexInstruction(func, OP_SUQ);
3837          for (int c = 0, d = 0; c < 4; ++c) {
3838             if (dst0[c]) {
3839                texi->setDef(d++, dst0[c]);
3840                texi->tex.mask |= 1 << c;
3841             }
3842          }
3843          if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE) {
3844             texi->tex.r = tgsi.getSrc(0).getIndex(0);
3845             if (tgsi.getSrc(0).isIndirect(0))
3846                texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
3847          } else {
3848             texi->tex.bindless = true;
3849             texi->setIndirectR(fetchSrc(0, 0));
3850          }
3851          texi->tex.target = tgsi.getImageTarget();
3852 
3853          bb->insertTail(texi);
3854       }
3855       break;
3856    case TGSI_OPCODE_IBFE:
3857    case TGSI_OPCODE_UBFE:
3858       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3859          src0 = fetchSrc(0, c);
3860          val0 = getScratch();
3861          if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE &&
3862              tgsi.getSrc(2).getFile() == TGSI_FILE_IMMEDIATE) {
3863             loadImm(val0, (tgsi.getSrc(2).getValueU32(c, info) << 8) |
3864                     tgsi.getSrc(1).getValueU32(c, info));
3865          } else {
3866             src1 = fetchSrc(1, c);
3867             src2 = fetchSrc(2, c);
3868             mkOp3(OP_INSBF, TYPE_U32, val0, src2, mkImm(0x808), src1);
3869          }
3870          mkOp2(OP_EXTBF, dstTy, dst0[c], src0, val0);
3871       }
3872       break;
3873    case TGSI_OPCODE_BFI:
3874       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3875          src0 = fetchSrc(0, c);
3876          src1 = fetchSrc(1, c);
3877          src2 = fetchSrc(2, c);
3878          src3 = fetchSrc(3, c);
3879          val0 = getScratch();
3880          mkOp3(OP_INSBF, TYPE_U32, val0, src3, mkImm(0x808), src2);
3881          mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, val0, src0);
3882       }
3883       break;
3884    case TGSI_OPCODE_LSB:
3885       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3886          src0 = fetchSrc(0, c);
3887          val0 = getScratch();
3888          geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000));
3889          geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
3890          geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);
3891          geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
3892       }
3893       break;
3894    case TGSI_OPCODE_IMSB:
3895    case TGSI_OPCODE_UMSB:
3896       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3897          src0 = fetchSrc(0, c);
3898          mkOp1(OP_BFIND, srcTy, dst0[c], src0);
3899       }
3900       break;
3901    case TGSI_OPCODE_BREV:
3902       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3903          src0 = fetchSrc(0, c);
3904          geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
3905          geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
3906       }
3907       break;
3908    case TGSI_OPCODE_POPC:
3909       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3910          src0 = fetchSrc(0, c);
3911          mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0);
3912       }
3913       break;
3914    case TGSI_OPCODE_INTERP_CENTROID:
3915    case TGSI_OPCODE_INTERP_SAMPLE:
3916    case TGSI_OPCODE_INTERP_OFFSET:
3917       handleINTERP(dst0);
3918       break;
3919    case TGSI_OPCODE_I642F:
3920    case TGSI_OPCODE_U642F:
3921    case TGSI_OPCODE_D2I:
3922    case TGSI_OPCODE_D2U:
3923    case TGSI_OPCODE_D2F: {
3924       int pos = 0;
3925       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3926          Value *dreg = getSSA(8);
3927          src0 = fetchSrc(0, pos);
3928          src1 = fetchSrc(0, pos + 1);
3929          mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1);
3930          Instruction *cvt = mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg);
3931          if (!isFloatType(dstTy))
3932             cvt->rnd = ROUND_Z;
3933          pos += 2;
3934       }
3935       break;
3936    }
3937    case TGSI_OPCODE_I2I64:
3938       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3939          dst0[c] = fetchSrc(0, c / 2);
3940          mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(NULL, 31));
3941          c++;
3942       }
3943       break;
3944    case TGSI_OPCODE_U2I64:
3945       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3946          dst0[c] = fetchSrc(0, c / 2);
3947          dst0[c + 1] = zero;
3948          c++;
3949       }
3950       break;
3951    case TGSI_OPCODE_F2I64:
3952    case TGSI_OPCODE_F2U64:
3953    case TGSI_OPCODE_I2D:
3954    case TGSI_OPCODE_U2D:
3955    case TGSI_OPCODE_F2D:
3956       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3957          Value *dreg = getSSA(8);
3958          Instruction *cvt = mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
3959          if (!isFloatType(dstTy))
3960             cvt->rnd = ROUND_Z;
3961          mkSplit(&dst0[c], 4, dreg);
3962          c++;
3963       }
3964       break;
3965    case TGSI_OPCODE_D2I64:
3966    case TGSI_OPCODE_D2U64:
3967    case TGSI_OPCODE_I642D:
3968    case TGSI_OPCODE_U642D:
3969       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3970          src0 = getSSA(8);
3971          Value *dst = getSSA(8), *tmp[2];
3972          tmp[0] = fetchSrc(0, c);
3973          tmp[1] = fetchSrc(0, c + 1);
3974          mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3975          Instruction *cvt = mkCvt(OP_CVT, dstTy, dst, srcTy, src0);
3976          if (!isFloatType(dstTy))
3977             cvt->rnd = ROUND_Z;
3978          mkSplit(&dst0[c], 4, dst);
3979          c++;
3980       }
3981       break;
3982    case TGSI_OPCODE_I64NEG:
3983       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3984          src0 = getSSA(8);
3985          Value *dst = getSSA(8), *tmp[2];
3986          tmp[0] = fetchSrc(0, c);
3987          tmp[1] = fetchSrc(0, c + 1);
3988          mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
3989          mkOp2(OP_SUB, dstTy, dst, zero, src0);
3990          mkSplit(&dst0[c], 4, dst);
3991          c++;
3992       }
3993       break;
3994    case TGSI_OPCODE_I64ABS:
3995       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
3996          src0 = getSSA(8);
3997          Value *neg = getSSA(8), *srcComp[2], *negComp[2];
3998          srcComp[0] = fetchSrc(0, c);
3999          srcComp[1] = fetchSrc(0, c + 1);
4000          mkOp2(OP_MERGE, TYPE_U64, src0, srcComp[0], srcComp[1]);
4001          mkOp2(OP_SUB, dstTy, neg, zero, src0);
4002          mkSplit(negComp, 4, neg);
4003          mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c], TYPE_S32,
4004                negComp[0], srcComp[0], srcComp[1]);
4005          mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c + 1], TYPE_S32,
4006                negComp[1], srcComp[1], srcComp[1]);
4007          c++;
4008       }
4009       break;
4010    case TGSI_OPCODE_DABS:
4011    case TGSI_OPCODE_DNEG:
4012    case TGSI_OPCODE_DRCP:
4013    case TGSI_OPCODE_DSQRT:
4014    case TGSI_OPCODE_DRSQ:
4015    case TGSI_OPCODE_DTRUNC:
4016    case TGSI_OPCODE_DCEIL:
4017    case TGSI_OPCODE_DFLR:
4018       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4019          src0 = getSSA(8);
4020          Value *dst = getSSA(8), *tmp[2];
4021          tmp[0] = fetchSrc(0, c);
4022          tmp[1] = fetchSrc(0, c + 1);
4023          mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4024          mkOp1(op, dstTy, dst, src0);
4025          mkSplit(&dst0[c], 4, dst);
4026          c++;
4027       }
4028       break;
4029    case TGSI_OPCODE_DFRAC:
4030       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4031          src0 = getSSA(8);
4032          Value *dst = getSSA(8), *tmp[2];
4033          tmp[0] = fetchSrc(0, c);
4034          tmp[1] = fetchSrc(0, c + 1);
4035          mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4036          mkOp1(OP_FLOOR, TYPE_F64, dst, src0);
4037          mkOp2(OP_SUB, TYPE_F64, dst, src0, dst);
4038          mkSplit(&dst0[c], 4, dst);
4039          c++;
4040       }
4041       break;
4042    case TGSI_OPCODE_U64SEQ:
4043    case TGSI_OPCODE_U64SNE:
4044    case TGSI_OPCODE_U64SLT:
4045    case TGSI_OPCODE_U64SGE:
4046    case TGSI_OPCODE_I64SLT:
4047    case TGSI_OPCODE_I64SGE:
4048    case TGSI_OPCODE_DSLT:
4049    case TGSI_OPCODE_DSGE:
4050    case TGSI_OPCODE_DSEQ:
4051    case TGSI_OPCODE_DSNE: {
4052       int pos = 0;
4053       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4054          Value *tmp[2];
4055 
4056          src0 = getSSA(8);
4057          src1 = getSSA(8);
4058          tmp[0] = fetchSrc(0, pos);
4059          tmp[1] = fetchSrc(0, pos + 1);
4060          mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4061          tmp[0] = fetchSrc(1, pos);
4062          tmp[1] = fetchSrc(1, pos + 1);
4063          mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4064          mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1);
4065          pos += 2;
4066       }
4067       break;
4068    }
4069    case TGSI_OPCODE_U64MIN:
4070    case TGSI_OPCODE_U64MAX:
4071    case TGSI_OPCODE_I64MIN:
4072    case TGSI_OPCODE_I64MAX: {
4073       dstTy = isSignedIntType(dstTy) ? TYPE_S32 : TYPE_U32;
4074       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4075          Value *flag = getSSA(1, FILE_FLAGS);
4076          src0 = fetchSrc(0, c + 1);
4077          src1 = fetchSrc(1, c + 1);
4078          geni = mkOp2(op, dstTy, dst0[c + 1], src0, src1);
4079          geni->subOp = NV50_IR_SUBOP_MINMAX_HIGH;
4080          geni->setFlagsDef(1, flag);
4081 
4082          src0 = fetchSrc(0, c);
4083          src1 = fetchSrc(1, c);
4084          geni = mkOp2(op, TYPE_U32, dst0[c], src0, src1);
4085          geni->subOp = NV50_IR_SUBOP_MINMAX_LOW;
4086          geni->setFlagsSrc(2, flag);
4087 
4088          c++;
4089       }
4090       break;
4091    }
4092    case TGSI_OPCODE_U64SHL:
4093    case TGSI_OPCODE_I64SHR:
4094    case TGSI_OPCODE_U64SHR:
4095       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4096          src0 = getSSA(8);
4097          Value *dst = getSSA(8), *tmp[2];
4098          tmp[0] = fetchSrc(0, c);
4099          tmp[1] = fetchSrc(0, c + 1);
4100          mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4101          // Theoretically src1 is a 64-bit value but in practice only the low
4102          // bits matter. The IR expects this to be a 32-bit value.
4103          src1 = fetchSrc(1, c);
4104          mkOp2(op, dstTy, dst, src0, src1);
4105          mkSplit(&dst0[c], 4, dst);
4106          c++;
4107       }
4108       break;
4109    case TGSI_OPCODE_U64ADD:
4110    case TGSI_OPCODE_U64MUL:
4111    case TGSI_OPCODE_DADD:
4112    case TGSI_OPCODE_DMUL:
4113    case TGSI_OPCODE_DDIV:
4114    case TGSI_OPCODE_DMAX:
4115    case TGSI_OPCODE_DMIN:
4116       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4117          src0 = getSSA(8);
4118          src1 = getSSA(8);
4119          Value *dst = getSSA(8), *tmp[2];
4120          tmp[0] = fetchSrc(0, c);
4121          tmp[1] = fetchSrc(0, c + 1);
4122          mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4123          tmp[0] = fetchSrc(1, c);
4124          tmp[1] = fetchSrc(1, c + 1);
4125          mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4126          mkOp2(op, dstTy, dst, src0, src1);
4127          mkSplit(&dst0[c], 4, dst);
4128          c++;
4129       }
4130       break;
4131    case TGSI_OPCODE_DMAD:
4132    case TGSI_OPCODE_DFMA:
4133       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4134          src0 = getSSA(8);
4135          src1 = getSSA(8);
4136          src2 = getSSA(8);
4137          Value *dst = getSSA(8), *tmp[2];
4138          tmp[0] = fetchSrc(0, c);
4139          tmp[1] = fetchSrc(0, c + 1);
4140          mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4141          tmp[0] = fetchSrc(1, c);
4142          tmp[1] = fetchSrc(1, c + 1);
4143          mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]);
4144          tmp[0] = fetchSrc(2, c);
4145          tmp[1] = fetchSrc(2, c + 1);
4146          mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]);
4147          mkOp3(op, dstTy, dst, src0, src1, src2);
4148          mkSplit(&dst0[c], 4, dst);
4149          c++;
4150       }
4151       break;
4152    case TGSI_OPCODE_DROUND:
4153       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4154          src0 = getSSA(8);
4155          Value *dst = getSSA(8), *tmp[2];
4156          tmp[0] = fetchSrc(0, c);
4157          tmp[1] = fetchSrc(0, c + 1);
4158          mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4159          mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0)
4160          ->rnd = ROUND_NI;
4161          mkSplit(&dst0[c], 4, dst);
4162          c++;
4163       }
4164       break;
4165    case TGSI_OPCODE_DSSG:
4166       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4167          src0 = getSSA(8);
4168          Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2];
4169          tmp[0] = fetchSrc(0, c);
4170          tmp[1] = fetchSrc(0, c + 1);
4171          mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4172 
4173          val0 = getScratch();
4174          val1 = getScratch();
4175          // The zero is wrong here since it's only 32-bit, but it works out in
4176          // the end since it gets replaced with $r63.
4177          mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero);
4178          mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero);
4179          mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1);
4180          mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32);
4181          mkSplit(&dst0[c], 4, dst);
4182          c++;
4183       }
4184       break;
4185    case TGSI_OPCODE_I64SSG:
4186       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
4187          src0 = getSSA(8);
4188          Value *tmp[2];
4189          tmp[0] = fetchSrc(0, c);
4190          tmp[1] = fetchSrc(0, c + 1);
4191          mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
4192 
4193          val0 = getScratch();
4194          val1 = getScratch();
4195          mkCmp(OP_SET, CC_GT, TYPE_U32, val0, TYPE_S64, src0, zero);
4196          mkCmp(OP_SET, CC_LT, TYPE_U32, val1, TYPE_S64, src0, zero);
4197          mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
4198          mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(0, 31));
4199          c++;
4200       }
4201       break;
4202    default:
4203       ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());
4204       assert(0);
4205       break;
4206    }
4207 
4208    if (tgsi.dstCount() && tgsi.getOpcode() != TGSI_OPCODE_STORE) {
4209       for (c = 0; c < 4; ++c) {
4210          if (!dst0[c])
4211             continue;
4212          if (dst0[c] != rDst0[c])
4213             mkMov(rDst0[c], dst0[c]);
4214          storeDst(0, c, rDst0[c]);
4215       }
4216    }
4217    vtxBaseValid = 0;
4218 
4219    return true;
4220 }
4221 
4222 void
handleUserClipPlanes()4223 Converter::handleUserClipPlanes()
4224 {
4225    Value *res[8];
4226    int n, i, c;
4227 
4228    for (c = 0; c < 4; ++c) {
4229       for (i = 0; i < info->io.genUserClip; ++i) {
4230          Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
4231                                 TYPE_F32, info->io.ucpBase + i * 16 + c * 4);
4232          Value *ucp = mkLoadv(TYPE_F32, sym, NULL);
4233          if (c == 0)
4234             res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp);
4235          else
4236             mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]);
4237       }
4238    }
4239 
4240    const int first = info->numOutputs - (info->io.genUserClip + 3) / 4;
4241 
4242    for (i = 0; i < info->io.genUserClip; ++i) {
4243       n = i / 4 + first;
4244       c = i % 4;
4245       Symbol *sym =
4246          mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4);
4247       mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]);
4248    }
4249 }
4250 
4251 void
exportOutputs()4252 Converter::exportOutputs()
4253 {
4254    if (info->io.alphaRefBase) {
4255       for (unsigned int i = 0; i < info->numOutputs; ++i) {
4256          if (info->out[i].sn != TGSI_SEMANTIC_COLOR ||
4257              info->out[i].si != 0)
4258             continue;
4259          const unsigned int c = 3;
4260          if (!oData.exists(sub.cur->values, i, c))
4261             continue;
4262          Value *val = oData.load(sub.cur->values, i, c, NULL);
4263          if (!val)
4264             continue;
4265 
4266          Symbol *ref = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
4267                                 TYPE_U32, info->io.alphaRefBase);
4268          Value *pred = new_LValue(func, FILE_PREDICATE);
4269          mkCmp(OP_SET, CC_TR, TYPE_U32, pred, TYPE_F32, val,
4270                mkLoadv(TYPE_U32, ref, NULL))
4271             ->subOp = 1;
4272          mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_NOT_P, pred);
4273       }
4274    }
4275 
4276    for (unsigned int i = 0; i < info->numOutputs; ++i) {
4277       for (unsigned int c = 0; c < 4; ++c) {
4278          if (!oData.exists(sub.cur->values, i, c))
4279             continue;
4280          Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
4281                                 info->out[i].slot[c] * 4);
4282          Value *val = oData.load(sub.cur->values, i, c, NULL);
4283          if (val) {
4284             if (info->out[i].sn == TGSI_SEMANTIC_POSITION)
4285                mkOp1(OP_SAT, TYPE_F32, val, val);
4286             mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
4287          }
4288       }
4289    }
4290 }
4291 
Converter(Program * ir,const tgsi::Source * code)4292 Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir),
4293      code(code),
4294      tgsi(NULL),
4295      tData(this), lData(this), aData(this), oData(this)
4296 {
4297    info = code->info;
4298 
4299    const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY);
4300    const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS);
4301    const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT);
4302 
4303    tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, FILE_GPR, 0);
4304    lData.setup(TGSI_FILE_TEMPORARY, 1, 0, tSize, 4, 4, FILE_MEMORY_LOCAL, 0);
4305    aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0);
4306    oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0);
4307 
4308    zero = mkImm((uint32_t)0);
4309 
4310    vtxBaseValid = 0;
4311 }
4312 
~Converter()4313 Converter::~Converter()
4314 {
4315 }
4316 
4317 inline const Converter::Location *
getValueLocation(Subroutine * s,Value * v)4318 Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v)
4319 {
4320    ValueMap::l_iterator it = s->values.l.find(v);
4321    return it == s->values.l.end() ? NULL : &it->second;
4322 }
4323 
4324 template<typename T> inline void
updateCallArgs(Instruction * i,void (Instruction::* setArg)(int,Value *),T (Function::* proto))4325 Converter::BindArgumentsPass::updateCallArgs(
4326    Instruction *i, void (Instruction::*setArg)(int, Value *),
4327    T (Function::*proto))
4328 {
4329    Function *g = i->asFlow()->target.fn;
4330    Subroutine *subg = conv.getSubroutine(g);
4331 
4332    for (unsigned a = 0; a < (g->*proto).size(); ++a) {
4333       Value *v = (g->*proto)[a].get();
4334       const Converter::Location &l = *getValueLocation(subg, v);
4335       Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx);
4336 
4337       (i->*setArg)(a, array->acquire(sub->values, l.i, l.c));
4338    }
4339 }
4340 
4341 template<typename T> inline void
updatePrototype(BitSet * set,void (Function::* updateSet)(),T (Function::* proto))4342 Converter::BindArgumentsPass::updatePrototype(
4343    BitSet *set, void (Function::*updateSet)(), T (Function::*proto))
4344 {
4345    (func->*updateSet)();
4346 
4347    for (unsigned i = 0; i < set->getSize(); ++i) {
4348       Value *v = func->getLValue(i);
4349       const Converter::Location *l = getValueLocation(sub, v);
4350 
4351       // only include values with a matching TGSI register
4352       if (set->test(i) && l && !conv.code->locals.count(*l))
4353          (func->*proto).push_back(v);
4354    }
4355 }
4356 
4357 bool
visit(Function * f)4358 Converter::BindArgumentsPass::visit(Function *f)
4359 {
4360    sub = conv.getSubroutine(f);
4361 
4362    for (ArrayList::Iterator bi = f->allBBlocks.iterator();
4363         !bi.end(); bi.next()) {
4364       for (Instruction *i = BasicBlock::get(bi)->getFirst();
4365            i; i = i->next) {
4366          if (i->op == OP_CALL && !i->asFlow()->builtin) {
4367             updateCallArgs(i, &Instruction::setSrc, &Function::ins);
4368             updateCallArgs(i, &Instruction::setDef, &Function::outs);
4369          }
4370       }
4371    }
4372 
4373    if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE)
4374       return true;
4375    updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet,
4376                    &Function::buildLiveSets, &Function::ins);
4377    updatePrototype(&BasicBlock::get(f->cfgExit)->defSet,
4378                    &Function::buildDefSets, &Function::outs);
4379 
4380    return true;
4381 }
4382 
4383 bool
run()4384 Converter::run()
4385 {
4386    BasicBlock *entry = new BasicBlock(prog->main);
4387    BasicBlock *leave = new BasicBlock(prog->main);
4388 
4389    prog->main->setEntry(entry);
4390    prog->main->setExit(leave);
4391 
4392    setPosition(entry, true);
4393    sub.cur = getSubroutine(prog->main);
4394 
4395    if (info->io.genUserClip > 0) {
4396       for (int c = 0; c < 4; ++c)
4397          clipVtx[c] = getScratch();
4398    }
4399 
4400    switch (prog->getType()) {
4401    case Program::TYPE_TESSELLATION_CONTROL:
4402       outBase = mkOp2v(
4403          OP_SUB, TYPE_U32, getSSA(),
4404          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LANEID, 0)),
4405          mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)));
4406       break;
4407    case Program::TYPE_FRAGMENT: {
4408       Symbol *sv = mkSysVal(SV_POSITION, 3);
4409       fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
4410       mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
4411       break;
4412    }
4413    default:
4414       break;
4415    }
4416 
4417    if (info->io.viewportId >= 0)
4418       viewport = getScratch();
4419    else
4420       viewport = NULL;
4421 
4422    for (ip = 0; ip < code->scan.num_instructions; ++ip) {
4423       if (!handleInstruction(&code->insns[ip]))
4424          return false;
4425    }
4426 
4427    if (!BindArgumentsPass(*this).run(prog))
4428       return false;
4429 
4430    return true;
4431 }
4432 
4433 } // unnamed namespace
4434 
4435 namespace nv50_ir {
4436 
4437 bool
makeFromTGSI(struct nv50_ir_prog_info * info)4438 Program::makeFromTGSI(struct nv50_ir_prog_info *info)
4439 {
4440    tgsi::Source src(info);
4441    if (!src.scanSource())
4442       return false;
4443    tlsSize = info->bin.tlsSpace;
4444 
4445    Converter builder(this, &src);
4446    return builder.run();
4447 }
4448 
4449 } // namespace nv50_ir
4450