• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Christoph Bumiller
3  *           2014 Red Hat Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "codegen/nv50_ir.h"
25 #include "codegen/nv50_ir_build_util.h"
26 
27 #include "codegen/nv50_ir_target_nvc0.h"
28 #include "codegen/nv50_ir_lowering_gm107.h"
29 
30 #include <limits>
31 
32 namespace nv50_ir {
33 
34 #define QOP_ADD  0
35 #define QOP_SUBR 1
36 #define QOP_SUB  2
37 #define QOP_MOV2 3
38 
39 //             UL UR LL LR
40 #define QUADOP(q, r, s, t)                      \
41    ((QOP_##q << 6) | (QOP_##r << 4) |           \
42     (QOP_##s << 2) | (QOP_##t << 0))
43 
44 void
handlePFETCH(Instruction * i)45 GM107LegalizeSSA::handlePFETCH(Instruction *i)
46 {
47    Value *src0;
48 
49    if (i->src(0).getFile() == FILE_GPR && !i->srcExists(1))
50       return;
51 
52    bld.setPosition(i, false);
53    src0 = bld.getSSA();
54 
55    if (i->srcExists(1))
56       bld.mkOp2(OP_ADD , TYPE_U32, src0, i->getSrc(0), i->getSrc(1));
57    else
58       bld.mkOp1(OP_MOV , TYPE_U32, src0, i->getSrc(0));
59 
60    i->setSrc(0, src0);
61    i->setSrc(1, NULL);
62 }
63 
64 void
handleLOAD(Instruction * i)65 GM107LegalizeSSA::handleLOAD(Instruction *i)
66 {
67    if (i->src(0).getFile() != FILE_MEMORY_CONST)
68       return;
69    if (i->src(0).isIndirect(0))
70       return;
71    if (typeSizeof(i->dType) != 4)
72       return;
73 
74    i->op = OP_MOV;
75 }
76 
77 bool
visit(Instruction * i)78 GM107LegalizeSSA::visit(Instruction *i)
79 {
80    switch (i->op) {
81    case OP_PFETCH:
82       handlePFETCH(i);
83       break;
84    case OP_LOAD:
85       handleLOAD(i);
86       break;
87    default:
88       break;
89    }
90    return true;
91 }
92 
93 bool
handleManualTXD(TexInstruction * i)94 GM107LoweringPass::handleManualTXD(TexInstruction *i)
95 {
96    static const uint8_t qOps[4][2] =
97    {
98       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(MOV2, MOV2, ADD,  ADD) }, // l0
99       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD,  ADD) }, // l1
100       { QUADOP(MOV2, ADD,  MOV2, ADD),  QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
101       { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
102    };
103    Value *def[4][4];
104    Value *crd[3];
105    Value *tmp;
106    Instruction *tex, *add;
107    Value *zero = bld.loadImm(bld.getSSA(), 0);
108    int l, c;
109    const int dim = i->tex.target.getDim() + i->tex.target.isCube();
110    const int array = i->tex.target.isArray();
111 
112    i->op = OP_TEX; // no need to clone dPdx/dPdy later
113 
114    for (c = 0; c < dim; ++c)
115       crd[c] = bld.getScratch();
116    tmp = bld.getScratch();
117 
118    for (l = 0; l < 4; ++l) {
119       Value *src[3], *val;
120       // mov coordinates from lane l to all lanes
121       bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
122       for (c = 0; c < dim; ++c) {
123          bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), bld.mkImm(l));
124          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero);
125          add->subOp = 0x00;
126          add->lanes = 1; /* abused for .ndv */
127       }
128 
129       // add dPdx from lane l to lanes dx
130       for (c = 0; c < dim; ++c) {
131          bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l));
132          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
133          add->subOp = qOps[l][0];
134          add->lanes = 1; /* abused for .ndv */
135       }
136 
137       // add dPdy from lane l to lanes dy
138       for (c = 0; c < dim; ++c) {
139          bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l));
140          add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
141          add->subOp = qOps[l][1];
142          add->lanes = 1; /* abused for .ndv */
143       }
144 
145       // normalize cube coordinates if necessary
146       if (i->tex.target.isCube()) {
147          for (c = 0; c < 3; ++c)
148             src[c] = bld.mkOp1v(OP_ABS, TYPE_F32, bld.getSSA(), crd[c]);
149          val = bld.getScratch();
150          bld.mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]);
151          bld.mkOp2(OP_MAX, TYPE_F32, val, src[2], val);
152          bld.mkOp1(OP_RCP, TYPE_F32, val, val);
153          for (c = 0; c < 3; ++c)
154             src[c] = bld.mkOp2v(OP_MUL, TYPE_F32, bld.getSSA(), crd[c], val);
155       } else {
156          for (c = 0; c < dim; ++c)
157             src[c] = crd[c];
158       }
159 
160       // texture
161       bld.insert(tex = cloneForward(func, i));
162       for (c = 0; c < dim; ++c)
163          tex->setSrc(c + array, src[c]);
164       bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
165 
166       // save results
167       for (c = 0; i->defExists(c); ++c) {
168          Instruction *mov;
169          def[c][l] = bld.getSSA();
170          mov = bld.mkMov(def[c][l], tex->getDef(c));
171          mov->fixed = 1;
172          mov->lanes = 1 << l;
173       }
174    }
175 
176    for (c = 0; i->defExists(c); ++c) {
177       Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c));
178       for (l = 0; l < 4; ++l)
179          u->setSrc(l, def[c][l]);
180    }
181 
182    i->bb->remove(i);
183    return true;
184 }
185 
186 bool
handleDFDX(Instruction * insn)187 GM107LoweringPass::handleDFDX(Instruction *insn)
188 {
189    Instruction *shfl;
190    int qop = 0, xid = 0;
191 
192    switch (insn->op) {
193    case OP_DFDX:
194       qop = QUADOP(SUB, SUBR, SUB, SUBR);
195       xid = 1;
196       break;
197    case OP_DFDY:
198       qop = QUADOP(SUB, SUB, SUBR, SUBR);
199       xid = 2;
200       break;
201    default:
202       assert(!"invalid dfdx opcode");
203       break;
204    }
205 
206    shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(),
207                     insn->getSrc(0), bld.mkImm(xid));
208    shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY;
209    insn->op = OP_QUADOP;
210    insn->subOp = qop;
211    insn->lanes = 0; /* abused for !.ndv */
212    insn->setSrc(1, insn->getSrc(0));
213    insn->setSrc(0, shfl->getDef(0));
214    return true;
215 }
216 
217 bool
handlePFETCH(Instruction * i)218 GM107LoweringPass::handlePFETCH(Instruction *i)
219 {
220    Value *tmp0 = bld.getScratch();
221    Value *tmp1 = bld.getScratch();
222    Value *tmp2 = bld.getScratch();
223    bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
224    bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16));
225    bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff));
226    bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff));
227    if (i->getSrc(1))
228       bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1));
229    else
230       bld.mkOp1(OP_MOV , TYPE_U32, tmp2, i->getSrc(0));
231    bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2);
232    i->setSrc(0, tmp0);
233    i->setSrc(1, NULL);
234    return true;
235 }
236 
237 bool
handlePOPCNT(Instruction * i)238 GM107LoweringPass::handlePOPCNT(Instruction *i)
239 {
240    Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(),
241                            i->getSrc(0), i->getSrc(1));
242    i->setSrc(0, tmp);
243    i->setSrc(1, NULL);
244    return true;
245 }
246 
247 //
248 // - add quadop dance for texturing
249 // - put FP outputs in GPRs
250 // - convert instruction sequences
251 //
252 bool
visit(Instruction * i)253 GM107LoweringPass::visit(Instruction *i)
254 {
255    bld.setPosition(i, false);
256 
257    if (i->cc != CC_ALWAYS)
258       checkPredicate(i);
259 
260    switch (i->op) {
261    case OP_PFETCH:
262       return handlePFETCH(i);
263    case OP_DFDX:
264    case OP_DFDY:
265       return handleDFDX(i);
266    case OP_POPCNT:
267       return handlePOPCNT(i);
268    default:
269       return NVC0LoweringPass::visit(i);
270    }
271 }
272 
273 } // namespace nv50_ir
274