1 /*
2 * Copyright 2011 Christoph Bumiller
3 * 2014 Red Hat Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "codegen/nv50_ir_target_gm107.h"
25 #include "codegen/nv50_ir_lowering_gm107.h"
26
27 namespace nv50_ir {
28
getTargetGM107(unsigned int chipset)29 Target *getTargetGM107(unsigned int chipset)
30 {
31 return new TargetGM107(chipset);
32 }
33
34 // BULTINS / LIBRARY FUNCTIONS:
35
36 // lazyness -> will just hardcode everything for the time being
37
38 #include "lib/gm107.asm.h"
39
40 void
getBuiltinCode(const uint32_t ** code,uint32_t * size) const41 TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
42 {
43 *code = (const uint32_t *)&gm107_builtin_code[0];
44 *size = sizeof(gm107_builtin_code);
45 }
46
47 uint32_t
getBuiltinOffset(int builtin) const48 TargetGM107::getBuiltinOffset(int builtin) const
49 {
50 assert(builtin < NVC0_BUILTIN_COUNT);
51 return gm107_builtin_offsets[builtin];
52 }
53
54 bool
isOpSupported(operation op,DataType ty) const55 TargetGM107::isOpSupported(operation op, DataType ty) const
56 {
57 switch (op) {
58 case OP_SAD:
59 case OP_POW:
60 case OP_DIV:
61 case OP_MOD:
62 return false;
63 case OP_SQRT:
64 if (ty == TYPE_F64)
65 return false;
66 return chipset >= NVISA_GM200_CHIPSET;
67 case OP_XMAD:
68 if (isFloatType(ty))
69 return false;
70 break;
71 default:
72 break;
73 }
74
75 return true;
76 }
77
78 // Return true when an instruction supports the reuse flag. When supported, the
79 // hardware will use the operand reuse cache introduced since Maxwell, which
80 // should try to reduce bank conflicts by caching values for the subsequent
81 // instructions. Note that the next instructions have to use the same GPR id in
82 // the same operand slot.
83 bool
isReuseSupported(const Instruction * insn) const84 TargetGM107::isReuseSupported(const Instruction *insn) const
85 {
86 const OpClass cl = getOpClass(insn->op);
87
88 // TODO: double-check!
89 switch (cl) {
90 case OPCLASS_ARITH:
91 case OPCLASS_COMPARE:
92 case OPCLASS_LOGIC:
93 case OPCLASS_MOVE:
94 case OPCLASS_SHIFT:
95 return true;
96 case OPCLASS_BITFIELD:
97 if (insn->op == OP_INSBF || insn->op == OP_EXTBF)
98 return true;
99 break;
100 default:
101 break;
102 }
103 return false;
104 }
105
106 // Return true when an instruction requires to set up a barrier because it
107 // doesn't operate at a fixed latency. Variable latency instructions are memory
108 // operations, double precision operations, special function unit operations
109 // and other low throughput instructions.
110 bool
isBarrierRequired(const Instruction * insn) const111 TargetGM107::isBarrierRequired(const Instruction *insn) const
112 {
113 const OpClass cl = getOpClass(insn->op);
114
115 if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64)
116 return true;
117
118 switch (cl) {
119 case OPCLASS_ATOMIC:
120 case OPCLASS_LOAD:
121 case OPCLASS_STORE:
122 case OPCLASS_SURFACE:
123 case OPCLASS_TEXTURE:
124 return true;
125 case OPCLASS_SFU:
126 switch (insn->op) {
127 case OP_COS:
128 case OP_EX2:
129 case OP_LG2:
130 case OP_LINTERP:
131 case OP_PINTERP:
132 case OP_RCP:
133 case OP_RSQ:
134 case OP_SIN:
135 case OP_SQRT:
136 return true;
137 default:
138 break;
139 }
140 break;
141 case OPCLASS_BITFIELD:
142 switch (insn->op) {
143 case OP_BFIND:
144 case OP_POPCNT:
145 return true;
146 default:
147 break;
148 }
149 break;
150 case OPCLASS_CONTROL:
151 switch (insn->op) {
152 case OP_EMIT:
153 case OP_RESTART:
154 return true;
155 default:
156 break;
157 }
158 break;
159 case OPCLASS_OTHER:
160 switch (insn->op) {
161 case OP_AFETCH:
162 case OP_PFETCH:
163 case OP_PIXLD:
164 case OP_SHFL:
165 return true;
166 case OP_RDSV:
167 return !isCS2RSV(insn->getSrc(0)->reg.data.sv.sv);
168 default:
169 break;
170 }
171 break;
172 case OPCLASS_ARITH:
173 if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
174 !isFloatType(insn->dType))
175 return true;
176 break;
177 case OPCLASS_CONVERT:
178 if (insn->def(0).getFile() != FILE_PREDICATE &&
179 insn->src(0).getFile() != FILE_PREDICATE)
180 return true;
181 break;
182 default:
183 break;
184 }
185 return false;
186 }
187
188 bool
canDualIssue(const Instruction * a,const Instruction * b) const189 TargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const
190 {
191 // TODO
192 return false;
193 }
194
195 // Return the number of stall counts needed to complete a single instruction.
196 // On Maxwell GPUs, the pipeline depth is 6, but some instructions require
197 // different number of stall counts like memory operations.
198 int
getLatency(const Instruction * insn) const199 TargetGM107::getLatency(const Instruction *insn) const
200 {
201 // TODO: better values! This should be good enough for now though.
202 switch (insn->op) {
203 case OP_EMIT:
204 case OP_EXPORT:
205 case OP_PIXLD:
206 case OP_RESTART:
207 case OP_STORE:
208 case OP_SUSTB:
209 case OP_SUSTP:
210 return 1;
211 case OP_SHFL:
212 return 2;
213 case OP_ADD:
214 case OP_AND:
215 case OP_EXTBF:
216 case OP_FMA:
217 case OP_INSBF:
218 case OP_MAD:
219 case OP_MAX:
220 case OP_MIN:
221 case OP_MOV:
222 case OP_MUL:
223 case OP_NOT:
224 case OP_OR:
225 case OP_PREEX2:
226 case OP_PRESIN:
227 case OP_QUADOP:
228 case OP_SELP:
229 case OP_SET:
230 case OP_SET_AND:
231 case OP_SET_OR:
232 case OP_SET_XOR:
233 case OP_SHL:
234 case OP_SHLADD:
235 case OP_SHR:
236 case OP_SLCT:
237 case OP_SUB:
238 case OP_VOTE:
239 case OP_XOR:
240 case OP_XMAD:
241 if (insn->dType != TYPE_F64)
242 return 6;
243 break;
244 case OP_RDSV:
245 return isCS2RSV(insn->getSrc(0)->reg.data.sv.sv) ? 6 : 15;
246 case OP_ABS:
247 case OP_CEIL:
248 case OP_CVT:
249 case OP_FLOOR:
250 case OP_NEG:
251 case OP_SAT:
252 case OP_TRUNC:
253 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
254 insn->src(0).getFile() == FILE_PREDICATE))
255 return 6;
256 break;
257 case OP_BFIND:
258 case OP_COS:
259 case OP_EX2:
260 case OP_LG2:
261 case OP_POPCNT:
262 case OP_QUADON:
263 case OP_QUADPOP:
264 case OP_RCP:
265 case OP_RSQ:
266 case OP_SIN:
267 case OP_SQRT:
268 return 13;
269 default:
270 break;
271 }
272 // Use the maximum number of stall counts for other instructions.
273 return 15;
274 }
275
276 // Return the operand read latency which is the number of stall counts before
277 // an instruction can read its sources. For memory operations like ATOM, LOAD
278 // and STORE, the memory access has to be indirect.
279 int
getReadLatency(const Instruction * insn) const280 TargetGM107::getReadLatency(const Instruction *insn) const
281 {
282 switch (insn->op) {
283 case OP_ABS:
284 case OP_BFIND:
285 case OP_CEIL:
286 case OP_COS:
287 case OP_EX2:
288 case OP_FLOOR:
289 case OP_LG2:
290 case OP_NEG:
291 case OP_POPCNT:
292 case OP_RCP:
293 case OP_RSQ:
294 case OP_SAT:
295 case OP_SIN:
296 case OP_SQRT:
297 case OP_SULDB:
298 case OP_SULDP:
299 case OP_SUREDB:
300 case OP_SUREDP:
301 case OP_SUSTB:
302 case OP_SUSTP:
303 case OP_TRUNC:
304 return 4;
305 case OP_CVT:
306 if (insn->def(0).getFile() != FILE_PREDICATE &&
307 insn->src(0).getFile() != FILE_PREDICATE)
308 return 4;
309 break;
310 case OP_ATOM:
311 case OP_LOAD:
312 case OP_STORE:
313 if (insn->src(0).isIndirect(0)) {
314 switch (insn->src(0).getFile()) {
315 case FILE_MEMORY_SHARED:
316 case FILE_MEMORY_CONST:
317 return 2;
318 case FILE_MEMORY_GLOBAL:
319 case FILE_MEMORY_LOCAL:
320 return 4;
321 default:
322 break;
323 }
324 }
325 break;
326 case OP_EXPORT:
327 case OP_PFETCH:
328 case OP_SHFL:
329 case OP_VFETCH:
330 return 2;
331 default:
332 break;
333 }
334 return 0;
335 }
336
337 bool
isCS2RSV(SVSemantic sv) const338 TargetGM107::isCS2RSV(SVSemantic sv) const
339 {
340 return sv == SV_CLOCK;
341 }
342
343 bool
runLegalizePass(Program * prog,CGStage stage) const344 TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
345 {
346 if (stage == CG_STAGE_PRE_SSA) {
347 GM107LoweringPass pass(prog);
348 return pass.run(prog, false, true);
349 } else
350 if (stage == CG_STAGE_POST_RA) {
351 NVC0LegalizePostRA pass(prog);
352 return pass.run(prog, false, true);
353 } else
354 if (stage == CG_STAGE_SSA) {
355 GM107LegalizeSSA pass;
356 return pass.run(prog, false, true);
357 }
358 return false;
359 }
360
361 CodeEmitter *
getCodeEmitter(Program::Type type)362 TargetGM107::getCodeEmitter(Program::Type type)
363 {
364 return createCodeEmitterGM107(type);
365 }
366
367 } // namespace nv50_ir
368