1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "NVPTXISelDAGToDAG.h"
15 #include "NVPTXUtilities.h"
16 #include "llvm/Analysis/ValueTracking.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include "llvm/Target/TargetIntrinsicInfo.h"
24
25 using namespace llvm;
26
27 #define DEBUG_TYPE "nvptx-isel"
28
29 static cl::opt<int> UsePrecDivF32(
30 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
31 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
32 " IEEE Compliant F32 div.rnd if available."),
33 cl::init(2));
34
35 static cl::opt<bool>
36 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
37 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
38 cl::init(true));
39
40 static cl::opt<bool>
41 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
42 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
43 cl::init(false));
44
45
46 /// createNVPTXISelDag - This pass converts a legalized DAG into a
47 /// NVPTX-specific DAG, ready for instruction scheduling.
createNVPTXISelDag(NVPTXTargetMachine & TM,llvm::CodeGenOpt::Level OptLevel)48 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
49 llvm::CodeGenOpt::Level OptLevel) {
50 return new NVPTXDAGToDAGISel(TM, OptLevel);
51 }
52
NVPTXDAGToDAGISel(NVPTXTargetMachine & tm,CodeGenOpt::Level OptLevel)53 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
54 CodeGenOpt::Level OptLevel)
55 : SelectionDAGISel(tm, OptLevel), TM(tm) {
56 doMulWide = (OptLevel > 0);
57 }
58
runOnMachineFunction(MachineFunction & MF)59 bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
60 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
61 return SelectionDAGISel::runOnMachineFunction(MF);
62 }
63
getDivF32Level() const64 int NVPTXDAGToDAGISel::getDivF32Level() const {
65 if (UsePrecDivF32.getNumOccurrences() > 0) {
66 // If nvptx-prec-div32=N is used on the command-line, always honor it
67 return UsePrecDivF32;
68 } else {
69 // Otherwise, use div.approx if fast math is enabled
70 if (TM.Options.UnsafeFPMath)
71 return 0;
72 else
73 return 2;
74 }
75 }
76
usePrecSqrtF32() const77 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
78 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
79 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
80 return UsePrecSqrtF32;
81 } else {
82 // Otherwise, use sqrt.approx if fast math is enabled
83 return !TM.Options.UnsafeFPMath;
84 }
85 }
86
useF32FTZ() const87 bool NVPTXDAGToDAGISel::useF32FTZ() const {
88 if (FtzEnabled.getNumOccurrences() > 0) {
89 // If nvptx-f32ftz is used on the command-line, always honor it
90 return FtzEnabled;
91 } else {
92 const Function *F = MF->getFunction();
93 // Otherwise, check for an nvptx-f32ftz attribute on the function
94 if (F->hasFnAttribute("nvptx-f32ftz"))
95 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
96 else
97 return false;
98 }
99 }
100
allowFMA() const101 bool NVPTXDAGToDAGISel::allowFMA() const {
102 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
103 return TL->allowFMA(*MF, OptLevel);
104 }
105
106 /// Select - Select instructions not customized! Used for
107 /// expanded, promoted and normal instructions.
Select(SDNode * N)108 void NVPTXDAGToDAGISel::Select(SDNode *N) {
109
110 if (N->isMachineOpcode()) {
111 N->setNodeId(-1);
112 return; // Already selected.
113 }
114
115 switch (N->getOpcode()) {
116 case ISD::LOAD:
117 if (tryLoad(N))
118 return;
119 break;
120 case ISD::STORE:
121 if (tryStore(N))
122 return;
123 break;
124 case NVPTXISD::LoadV2:
125 case NVPTXISD::LoadV4:
126 if (tryLoadVector(N))
127 return;
128 break;
129 case NVPTXISD::LDGV2:
130 case NVPTXISD::LDGV4:
131 case NVPTXISD::LDUV2:
132 case NVPTXISD::LDUV4:
133 if (tryLDGLDU(N))
134 return;
135 break;
136 case NVPTXISD::StoreV2:
137 case NVPTXISD::StoreV4:
138 if (tryStoreVector(N))
139 return;
140 break;
141 case NVPTXISD::LoadParam:
142 case NVPTXISD::LoadParamV2:
143 case NVPTXISD::LoadParamV4:
144 if (tryLoadParam(N))
145 return;
146 break;
147 case NVPTXISD::StoreRetval:
148 case NVPTXISD::StoreRetvalV2:
149 case NVPTXISD::StoreRetvalV4:
150 if (tryStoreRetval(N))
151 return;
152 break;
153 case NVPTXISD::StoreParam:
154 case NVPTXISD::StoreParamV2:
155 case NVPTXISD::StoreParamV4:
156 case NVPTXISD::StoreParamS32:
157 case NVPTXISD::StoreParamU32:
158 if (tryStoreParam(N))
159 return;
160 break;
161 case ISD::INTRINSIC_WO_CHAIN:
162 if (tryIntrinsicNoChain(N))
163 return;
164 break;
165 case ISD::INTRINSIC_W_CHAIN:
166 if (tryIntrinsicChain(N))
167 return;
168 break;
169 case NVPTXISD::Tex1DFloatS32:
170 case NVPTXISD::Tex1DFloatFloat:
171 case NVPTXISD::Tex1DFloatFloatLevel:
172 case NVPTXISD::Tex1DFloatFloatGrad:
173 case NVPTXISD::Tex1DS32S32:
174 case NVPTXISD::Tex1DS32Float:
175 case NVPTXISD::Tex1DS32FloatLevel:
176 case NVPTXISD::Tex1DS32FloatGrad:
177 case NVPTXISD::Tex1DU32S32:
178 case NVPTXISD::Tex1DU32Float:
179 case NVPTXISD::Tex1DU32FloatLevel:
180 case NVPTXISD::Tex1DU32FloatGrad:
181 case NVPTXISD::Tex1DArrayFloatS32:
182 case NVPTXISD::Tex1DArrayFloatFloat:
183 case NVPTXISD::Tex1DArrayFloatFloatLevel:
184 case NVPTXISD::Tex1DArrayFloatFloatGrad:
185 case NVPTXISD::Tex1DArrayS32S32:
186 case NVPTXISD::Tex1DArrayS32Float:
187 case NVPTXISD::Tex1DArrayS32FloatLevel:
188 case NVPTXISD::Tex1DArrayS32FloatGrad:
189 case NVPTXISD::Tex1DArrayU32S32:
190 case NVPTXISD::Tex1DArrayU32Float:
191 case NVPTXISD::Tex1DArrayU32FloatLevel:
192 case NVPTXISD::Tex1DArrayU32FloatGrad:
193 case NVPTXISD::Tex2DFloatS32:
194 case NVPTXISD::Tex2DFloatFloat:
195 case NVPTXISD::Tex2DFloatFloatLevel:
196 case NVPTXISD::Tex2DFloatFloatGrad:
197 case NVPTXISD::Tex2DS32S32:
198 case NVPTXISD::Tex2DS32Float:
199 case NVPTXISD::Tex2DS32FloatLevel:
200 case NVPTXISD::Tex2DS32FloatGrad:
201 case NVPTXISD::Tex2DU32S32:
202 case NVPTXISD::Tex2DU32Float:
203 case NVPTXISD::Tex2DU32FloatLevel:
204 case NVPTXISD::Tex2DU32FloatGrad:
205 case NVPTXISD::Tex2DArrayFloatS32:
206 case NVPTXISD::Tex2DArrayFloatFloat:
207 case NVPTXISD::Tex2DArrayFloatFloatLevel:
208 case NVPTXISD::Tex2DArrayFloatFloatGrad:
209 case NVPTXISD::Tex2DArrayS32S32:
210 case NVPTXISD::Tex2DArrayS32Float:
211 case NVPTXISD::Tex2DArrayS32FloatLevel:
212 case NVPTXISD::Tex2DArrayS32FloatGrad:
213 case NVPTXISD::Tex2DArrayU32S32:
214 case NVPTXISD::Tex2DArrayU32Float:
215 case NVPTXISD::Tex2DArrayU32FloatLevel:
216 case NVPTXISD::Tex2DArrayU32FloatGrad:
217 case NVPTXISD::Tex3DFloatS32:
218 case NVPTXISD::Tex3DFloatFloat:
219 case NVPTXISD::Tex3DFloatFloatLevel:
220 case NVPTXISD::Tex3DFloatFloatGrad:
221 case NVPTXISD::Tex3DS32S32:
222 case NVPTXISD::Tex3DS32Float:
223 case NVPTXISD::Tex3DS32FloatLevel:
224 case NVPTXISD::Tex3DS32FloatGrad:
225 case NVPTXISD::Tex3DU32S32:
226 case NVPTXISD::Tex3DU32Float:
227 case NVPTXISD::Tex3DU32FloatLevel:
228 case NVPTXISD::Tex3DU32FloatGrad:
229 case NVPTXISD::TexCubeFloatFloat:
230 case NVPTXISD::TexCubeFloatFloatLevel:
231 case NVPTXISD::TexCubeS32Float:
232 case NVPTXISD::TexCubeS32FloatLevel:
233 case NVPTXISD::TexCubeU32Float:
234 case NVPTXISD::TexCubeU32FloatLevel:
235 case NVPTXISD::TexCubeArrayFloatFloat:
236 case NVPTXISD::TexCubeArrayFloatFloatLevel:
237 case NVPTXISD::TexCubeArrayS32Float:
238 case NVPTXISD::TexCubeArrayS32FloatLevel:
239 case NVPTXISD::TexCubeArrayU32Float:
240 case NVPTXISD::TexCubeArrayU32FloatLevel:
241 case NVPTXISD::Tld4R2DFloatFloat:
242 case NVPTXISD::Tld4G2DFloatFloat:
243 case NVPTXISD::Tld4B2DFloatFloat:
244 case NVPTXISD::Tld4A2DFloatFloat:
245 case NVPTXISD::Tld4R2DS64Float:
246 case NVPTXISD::Tld4G2DS64Float:
247 case NVPTXISD::Tld4B2DS64Float:
248 case NVPTXISD::Tld4A2DS64Float:
249 case NVPTXISD::Tld4R2DU64Float:
250 case NVPTXISD::Tld4G2DU64Float:
251 case NVPTXISD::Tld4B2DU64Float:
252 case NVPTXISD::Tld4A2DU64Float:
253 case NVPTXISD::TexUnified1DFloatS32:
254 case NVPTXISD::TexUnified1DFloatFloat:
255 case NVPTXISD::TexUnified1DFloatFloatLevel:
256 case NVPTXISD::TexUnified1DFloatFloatGrad:
257 case NVPTXISD::TexUnified1DS32S32:
258 case NVPTXISD::TexUnified1DS32Float:
259 case NVPTXISD::TexUnified1DS32FloatLevel:
260 case NVPTXISD::TexUnified1DS32FloatGrad:
261 case NVPTXISD::TexUnified1DU32S32:
262 case NVPTXISD::TexUnified1DU32Float:
263 case NVPTXISD::TexUnified1DU32FloatLevel:
264 case NVPTXISD::TexUnified1DU32FloatGrad:
265 case NVPTXISD::TexUnified1DArrayFloatS32:
266 case NVPTXISD::TexUnified1DArrayFloatFloat:
267 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
268 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
269 case NVPTXISD::TexUnified1DArrayS32S32:
270 case NVPTXISD::TexUnified1DArrayS32Float:
271 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
272 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
273 case NVPTXISD::TexUnified1DArrayU32S32:
274 case NVPTXISD::TexUnified1DArrayU32Float:
275 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
276 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
277 case NVPTXISD::TexUnified2DFloatS32:
278 case NVPTXISD::TexUnified2DFloatFloat:
279 case NVPTXISD::TexUnified2DFloatFloatLevel:
280 case NVPTXISD::TexUnified2DFloatFloatGrad:
281 case NVPTXISD::TexUnified2DS32S32:
282 case NVPTXISD::TexUnified2DS32Float:
283 case NVPTXISD::TexUnified2DS32FloatLevel:
284 case NVPTXISD::TexUnified2DS32FloatGrad:
285 case NVPTXISD::TexUnified2DU32S32:
286 case NVPTXISD::TexUnified2DU32Float:
287 case NVPTXISD::TexUnified2DU32FloatLevel:
288 case NVPTXISD::TexUnified2DU32FloatGrad:
289 case NVPTXISD::TexUnified2DArrayFloatS32:
290 case NVPTXISD::TexUnified2DArrayFloatFloat:
291 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
292 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
293 case NVPTXISD::TexUnified2DArrayS32S32:
294 case NVPTXISD::TexUnified2DArrayS32Float:
295 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
296 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
297 case NVPTXISD::TexUnified2DArrayU32S32:
298 case NVPTXISD::TexUnified2DArrayU32Float:
299 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
300 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
301 case NVPTXISD::TexUnified3DFloatS32:
302 case NVPTXISD::TexUnified3DFloatFloat:
303 case NVPTXISD::TexUnified3DFloatFloatLevel:
304 case NVPTXISD::TexUnified3DFloatFloatGrad:
305 case NVPTXISD::TexUnified3DS32S32:
306 case NVPTXISD::TexUnified3DS32Float:
307 case NVPTXISD::TexUnified3DS32FloatLevel:
308 case NVPTXISD::TexUnified3DS32FloatGrad:
309 case NVPTXISD::TexUnified3DU32S32:
310 case NVPTXISD::TexUnified3DU32Float:
311 case NVPTXISD::TexUnified3DU32FloatLevel:
312 case NVPTXISD::TexUnified3DU32FloatGrad:
313 case NVPTXISD::TexUnifiedCubeFloatFloat:
314 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
315 case NVPTXISD::TexUnifiedCubeS32Float:
316 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
317 case NVPTXISD::TexUnifiedCubeU32Float:
318 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
319 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
320 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
321 case NVPTXISD::TexUnifiedCubeArrayS32Float:
322 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
323 case NVPTXISD::TexUnifiedCubeArrayU32Float:
324 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
325 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
326 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
327 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
328 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
329 case NVPTXISD::Tld4UnifiedR2DS64Float:
330 case NVPTXISD::Tld4UnifiedG2DS64Float:
331 case NVPTXISD::Tld4UnifiedB2DS64Float:
332 case NVPTXISD::Tld4UnifiedA2DS64Float:
333 case NVPTXISD::Tld4UnifiedR2DU64Float:
334 case NVPTXISD::Tld4UnifiedG2DU64Float:
335 case NVPTXISD::Tld4UnifiedB2DU64Float:
336 case NVPTXISD::Tld4UnifiedA2DU64Float:
337 if (tryTextureIntrinsic(N))
338 return;
339 break;
340 case NVPTXISD::Suld1DI8Clamp:
341 case NVPTXISD::Suld1DI16Clamp:
342 case NVPTXISD::Suld1DI32Clamp:
343 case NVPTXISD::Suld1DI64Clamp:
344 case NVPTXISD::Suld1DV2I8Clamp:
345 case NVPTXISD::Suld1DV2I16Clamp:
346 case NVPTXISD::Suld1DV2I32Clamp:
347 case NVPTXISD::Suld1DV2I64Clamp:
348 case NVPTXISD::Suld1DV4I8Clamp:
349 case NVPTXISD::Suld1DV4I16Clamp:
350 case NVPTXISD::Suld1DV4I32Clamp:
351 case NVPTXISD::Suld1DArrayI8Clamp:
352 case NVPTXISD::Suld1DArrayI16Clamp:
353 case NVPTXISD::Suld1DArrayI32Clamp:
354 case NVPTXISD::Suld1DArrayI64Clamp:
355 case NVPTXISD::Suld1DArrayV2I8Clamp:
356 case NVPTXISD::Suld1DArrayV2I16Clamp:
357 case NVPTXISD::Suld1DArrayV2I32Clamp:
358 case NVPTXISD::Suld1DArrayV2I64Clamp:
359 case NVPTXISD::Suld1DArrayV4I8Clamp:
360 case NVPTXISD::Suld1DArrayV4I16Clamp:
361 case NVPTXISD::Suld1DArrayV4I32Clamp:
362 case NVPTXISD::Suld2DI8Clamp:
363 case NVPTXISD::Suld2DI16Clamp:
364 case NVPTXISD::Suld2DI32Clamp:
365 case NVPTXISD::Suld2DI64Clamp:
366 case NVPTXISD::Suld2DV2I8Clamp:
367 case NVPTXISD::Suld2DV2I16Clamp:
368 case NVPTXISD::Suld2DV2I32Clamp:
369 case NVPTXISD::Suld2DV2I64Clamp:
370 case NVPTXISD::Suld2DV4I8Clamp:
371 case NVPTXISD::Suld2DV4I16Clamp:
372 case NVPTXISD::Suld2DV4I32Clamp:
373 case NVPTXISD::Suld2DArrayI8Clamp:
374 case NVPTXISD::Suld2DArrayI16Clamp:
375 case NVPTXISD::Suld2DArrayI32Clamp:
376 case NVPTXISD::Suld2DArrayI64Clamp:
377 case NVPTXISD::Suld2DArrayV2I8Clamp:
378 case NVPTXISD::Suld2DArrayV2I16Clamp:
379 case NVPTXISD::Suld2DArrayV2I32Clamp:
380 case NVPTXISD::Suld2DArrayV2I64Clamp:
381 case NVPTXISD::Suld2DArrayV4I8Clamp:
382 case NVPTXISD::Suld2DArrayV4I16Clamp:
383 case NVPTXISD::Suld2DArrayV4I32Clamp:
384 case NVPTXISD::Suld3DI8Clamp:
385 case NVPTXISD::Suld3DI16Clamp:
386 case NVPTXISD::Suld3DI32Clamp:
387 case NVPTXISD::Suld3DI64Clamp:
388 case NVPTXISD::Suld3DV2I8Clamp:
389 case NVPTXISD::Suld3DV2I16Clamp:
390 case NVPTXISD::Suld3DV2I32Clamp:
391 case NVPTXISD::Suld3DV2I64Clamp:
392 case NVPTXISD::Suld3DV4I8Clamp:
393 case NVPTXISD::Suld3DV4I16Clamp:
394 case NVPTXISD::Suld3DV4I32Clamp:
395 case NVPTXISD::Suld1DI8Trap:
396 case NVPTXISD::Suld1DI16Trap:
397 case NVPTXISD::Suld1DI32Trap:
398 case NVPTXISD::Suld1DI64Trap:
399 case NVPTXISD::Suld1DV2I8Trap:
400 case NVPTXISD::Suld1DV2I16Trap:
401 case NVPTXISD::Suld1DV2I32Trap:
402 case NVPTXISD::Suld1DV2I64Trap:
403 case NVPTXISD::Suld1DV4I8Trap:
404 case NVPTXISD::Suld1DV4I16Trap:
405 case NVPTXISD::Suld1DV4I32Trap:
406 case NVPTXISD::Suld1DArrayI8Trap:
407 case NVPTXISD::Suld1DArrayI16Trap:
408 case NVPTXISD::Suld1DArrayI32Trap:
409 case NVPTXISD::Suld1DArrayI64Trap:
410 case NVPTXISD::Suld1DArrayV2I8Trap:
411 case NVPTXISD::Suld1DArrayV2I16Trap:
412 case NVPTXISD::Suld1DArrayV2I32Trap:
413 case NVPTXISD::Suld1DArrayV2I64Trap:
414 case NVPTXISD::Suld1DArrayV4I8Trap:
415 case NVPTXISD::Suld1DArrayV4I16Trap:
416 case NVPTXISD::Suld1DArrayV4I32Trap:
417 case NVPTXISD::Suld2DI8Trap:
418 case NVPTXISD::Suld2DI16Trap:
419 case NVPTXISD::Suld2DI32Trap:
420 case NVPTXISD::Suld2DI64Trap:
421 case NVPTXISD::Suld2DV2I8Trap:
422 case NVPTXISD::Suld2DV2I16Trap:
423 case NVPTXISD::Suld2DV2I32Trap:
424 case NVPTXISD::Suld2DV2I64Trap:
425 case NVPTXISD::Suld2DV4I8Trap:
426 case NVPTXISD::Suld2DV4I16Trap:
427 case NVPTXISD::Suld2DV4I32Trap:
428 case NVPTXISD::Suld2DArrayI8Trap:
429 case NVPTXISD::Suld2DArrayI16Trap:
430 case NVPTXISD::Suld2DArrayI32Trap:
431 case NVPTXISD::Suld2DArrayI64Trap:
432 case NVPTXISD::Suld2DArrayV2I8Trap:
433 case NVPTXISD::Suld2DArrayV2I16Trap:
434 case NVPTXISD::Suld2DArrayV2I32Trap:
435 case NVPTXISD::Suld2DArrayV2I64Trap:
436 case NVPTXISD::Suld2DArrayV4I8Trap:
437 case NVPTXISD::Suld2DArrayV4I16Trap:
438 case NVPTXISD::Suld2DArrayV4I32Trap:
439 case NVPTXISD::Suld3DI8Trap:
440 case NVPTXISD::Suld3DI16Trap:
441 case NVPTXISD::Suld3DI32Trap:
442 case NVPTXISD::Suld3DI64Trap:
443 case NVPTXISD::Suld3DV2I8Trap:
444 case NVPTXISD::Suld3DV2I16Trap:
445 case NVPTXISD::Suld3DV2I32Trap:
446 case NVPTXISD::Suld3DV2I64Trap:
447 case NVPTXISD::Suld3DV4I8Trap:
448 case NVPTXISD::Suld3DV4I16Trap:
449 case NVPTXISD::Suld3DV4I32Trap:
450 case NVPTXISD::Suld1DI8Zero:
451 case NVPTXISD::Suld1DI16Zero:
452 case NVPTXISD::Suld1DI32Zero:
453 case NVPTXISD::Suld1DI64Zero:
454 case NVPTXISD::Suld1DV2I8Zero:
455 case NVPTXISD::Suld1DV2I16Zero:
456 case NVPTXISD::Suld1DV2I32Zero:
457 case NVPTXISD::Suld1DV2I64Zero:
458 case NVPTXISD::Suld1DV4I8Zero:
459 case NVPTXISD::Suld1DV4I16Zero:
460 case NVPTXISD::Suld1DV4I32Zero:
461 case NVPTXISD::Suld1DArrayI8Zero:
462 case NVPTXISD::Suld1DArrayI16Zero:
463 case NVPTXISD::Suld1DArrayI32Zero:
464 case NVPTXISD::Suld1DArrayI64Zero:
465 case NVPTXISD::Suld1DArrayV2I8Zero:
466 case NVPTXISD::Suld1DArrayV2I16Zero:
467 case NVPTXISD::Suld1DArrayV2I32Zero:
468 case NVPTXISD::Suld1DArrayV2I64Zero:
469 case NVPTXISD::Suld1DArrayV4I8Zero:
470 case NVPTXISD::Suld1DArrayV4I16Zero:
471 case NVPTXISD::Suld1DArrayV4I32Zero:
472 case NVPTXISD::Suld2DI8Zero:
473 case NVPTXISD::Suld2DI16Zero:
474 case NVPTXISD::Suld2DI32Zero:
475 case NVPTXISD::Suld2DI64Zero:
476 case NVPTXISD::Suld2DV2I8Zero:
477 case NVPTXISD::Suld2DV2I16Zero:
478 case NVPTXISD::Suld2DV2I32Zero:
479 case NVPTXISD::Suld2DV2I64Zero:
480 case NVPTXISD::Suld2DV4I8Zero:
481 case NVPTXISD::Suld2DV4I16Zero:
482 case NVPTXISD::Suld2DV4I32Zero:
483 case NVPTXISD::Suld2DArrayI8Zero:
484 case NVPTXISD::Suld2DArrayI16Zero:
485 case NVPTXISD::Suld2DArrayI32Zero:
486 case NVPTXISD::Suld2DArrayI64Zero:
487 case NVPTXISD::Suld2DArrayV2I8Zero:
488 case NVPTXISD::Suld2DArrayV2I16Zero:
489 case NVPTXISD::Suld2DArrayV2I32Zero:
490 case NVPTXISD::Suld2DArrayV2I64Zero:
491 case NVPTXISD::Suld2DArrayV4I8Zero:
492 case NVPTXISD::Suld2DArrayV4I16Zero:
493 case NVPTXISD::Suld2DArrayV4I32Zero:
494 case NVPTXISD::Suld3DI8Zero:
495 case NVPTXISD::Suld3DI16Zero:
496 case NVPTXISD::Suld3DI32Zero:
497 case NVPTXISD::Suld3DI64Zero:
498 case NVPTXISD::Suld3DV2I8Zero:
499 case NVPTXISD::Suld3DV2I16Zero:
500 case NVPTXISD::Suld3DV2I32Zero:
501 case NVPTXISD::Suld3DV2I64Zero:
502 case NVPTXISD::Suld3DV4I8Zero:
503 case NVPTXISD::Suld3DV4I16Zero:
504 case NVPTXISD::Suld3DV4I32Zero:
505 if (trySurfaceIntrinsic(N))
506 return;
507 break;
508 case ISD::AND:
509 case ISD::SRA:
510 case ISD::SRL:
511 // Try to select BFE
512 if (tryBFE(N))
513 return;
514 break;
515 case ISD::ADDRSPACECAST:
516 SelectAddrSpaceCast(N);
517 return;
518 default:
519 break;
520 }
521 SelectCode(N);
522 }
523
tryIntrinsicChain(SDNode * N)524 bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
525 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
526 switch (IID) {
527 default:
528 return false;
529 case Intrinsic::nvvm_ldg_global_f:
530 case Intrinsic::nvvm_ldg_global_i:
531 case Intrinsic::nvvm_ldg_global_p:
532 case Intrinsic::nvvm_ldu_global_f:
533 case Intrinsic::nvvm_ldu_global_i:
534 case Intrinsic::nvvm_ldu_global_p:
535 return tryLDGLDU(N);
536 }
537 }
538
getCodeAddrSpace(MemSDNode * N)539 static unsigned int getCodeAddrSpace(MemSDNode *N) {
540 const Value *Src = N->getMemOperand()->getValue();
541
542 if (!Src)
543 return NVPTX::PTXLdStInstCode::GENERIC;
544
545 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
546 switch (PT->getAddressSpace()) {
547 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
548 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
549 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
550 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
551 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
552 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
553 default: break;
554 }
555 }
556 return NVPTX::PTXLdStInstCode::GENERIC;
557 }
558
canLowerToLDG(MemSDNode * N,const NVPTXSubtarget & Subtarget,unsigned CodeAddrSpace,MachineFunction * F)559 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
560 unsigned CodeAddrSpace, MachineFunction *F) {
561 // To use non-coherent caching, the load has to be from global
562 // memory and we have to prove that the memory area is not written
563 // to anywhere for the duration of the kernel call, not even after
564 // the load.
565 //
566 // To ensure that there are no writes to the memory, we require the
567 // underlying pointer to be a noalias (__restrict) kernel parameter
568 // that is never used for a write. We can only do this for kernel
569 // functions since from within a device function, we cannot know if
570 // there were or will be writes to the memory from the caller - or we
571 // could, but then we would have to do inter-procedural analysis.
572 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
573 !isKernelFunction(*F->getFunction())) {
574 return false;
575 }
576
577 // We use GetUnderlyingObjects() here instead of
578 // GetUnderlyingObject() mainly because the former looks through phi
579 // nodes while the latter does not. We need to look through phi
580 // nodes to handle pointer induction variables.
581 SmallVector<Value *, 8> Objs;
582 GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
583 Objs, F->getDataLayout());
584 for (Value *Obj : Objs) {
585 auto *A = dyn_cast<const Argument>(Obj);
586 if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
587 }
588
589 return true;
590 }
591
tryIntrinsicNoChain(SDNode * N)592 bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
593 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
594 switch (IID) {
595 default:
596 return false;
597 case Intrinsic::nvvm_texsurf_handle_internal:
598 SelectTexSurfHandle(N);
599 return true;
600 }
601 }
602
SelectTexSurfHandle(SDNode * N)603 void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
604 // Op 0 is the intrinsic ID
605 SDValue Wrapper = N->getOperand(1);
606 SDValue GlobalVal = Wrapper.getOperand(0);
607 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
608 MVT::i64, GlobalVal));
609 }
610
SelectAddrSpaceCast(SDNode * N)611 void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
612 SDValue Src = N->getOperand(0);
613 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
614 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
615 unsigned DstAddrSpace = CastN->getDestAddressSpace();
616
617 assert(SrcAddrSpace != DstAddrSpace &&
618 "addrspacecast must be between different address spaces");
619
620 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
621 // Specific to generic
622 unsigned Opc;
623 switch (SrcAddrSpace) {
624 default: report_fatal_error("Bad address space in addrspacecast");
625 case ADDRESS_SPACE_GLOBAL:
626 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
627 break;
628 case ADDRESS_SPACE_SHARED:
629 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
630 break;
631 case ADDRESS_SPACE_CONST:
632 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
633 break;
634 case ADDRESS_SPACE_LOCAL:
635 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
636 break;
637 }
638 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
639 Src));
640 return;
641 } else {
642 // Generic to specific
643 if (SrcAddrSpace != 0)
644 report_fatal_error("Cannot cast between two non-generic address spaces");
645 unsigned Opc;
646 switch (DstAddrSpace) {
647 default: report_fatal_error("Bad address space in addrspacecast");
648 case ADDRESS_SPACE_GLOBAL:
649 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
650 : NVPTX::cvta_to_global_yes;
651 break;
652 case ADDRESS_SPACE_SHARED:
653 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
654 : NVPTX::cvta_to_shared_yes;
655 break;
656 case ADDRESS_SPACE_CONST:
657 Opc =
658 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
659 break;
660 case ADDRESS_SPACE_LOCAL:
661 Opc =
662 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
663 break;
664 case ADDRESS_SPACE_PARAM:
665 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
666 : NVPTX::nvvm_ptr_gen_to_param;
667 break;
668 }
669 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
670 Src));
671 return;
672 }
673 }
674
tryLoad(SDNode * N)675 bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
676 SDLoc dl(N);
677 LoadSDNode *LD = cast<LoadSDNode>(N);
678 EVT LoadedVT = LD->getMemoryVT();
679 SDNode *NVPTXLD = nullptr;
680
681 // do not support pre/post inc/dec
682 if (LD->isIndexed())
683 return false;
684
685 if (!LoadedVT.isSimple())
686 return false;
687
688 // Address Space Setting
689 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
690
691 if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
692 return tryLDGLDU(N);
693 }
694
695 // Volatile Setting
696 // - .volatile is only availalble for .global and .shared
697 bool isVolatile = LD->isVolatile();
698 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
699 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
700 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
701 isVolatile = false;
702
703 // Vector Setting
704 MVT SimpleVT = LoadedVT.getSimpleVT();
705 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
706 if (SimpleVT.isVector()) {
707 unsigned num = SimpleVT.getVectorNumElements();
708 if (num == 2)
709 vecType = NVPTX::PTXLdStInstCode::V2;
710 else if (num == 4)
711 vecType = NVPTX::PTXLdStInstCode::V4;
712 else
713 return false;
714 }
715
716 // Type Setting: fromType + fromTypeWidth
717 //
718 // Sign : ISD::SEXTLOAD
719 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
720 // type is integer
721 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
722 MVT ScalarVT = SimpleVT.getScalarType();
723 // Read at least 8 bits (predicates are stored as 8-bit values)
724 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
725 unsigned int fromType;
726 if ((LD->getExtensionType() == ISD::SEXTLOAD))
727 fromType = NVPTX::PTXLdStInstCode::Signed;
728 else if (ScalarVT.isFloatingPoint())
729 fromType = NVPTX::PTXLdStInstCode::Float;
730 else
731 fromType = NVPTX::PTXLdStInstCode::Unsigned;
732
733 // Create the machine instruction DAG
734 SDValue Chain = N->getOperand(0);
735 SDValue N1 = N->getOperand(1);
736 SDValue Addr;
737 SDValue Offset, Base;
738 unsigned Opcode;
739 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
740
741 if (SelectDirectAddr(N1, Addr)) {
742 switch (TargetVT) {
743 case MVT::i8:
744 Opcode = NVPTX::LD_i8_avar;
745 break;
746 case MVT::i16:
747 Opcode = NVPTX::LD_i16_avar;
748 break;
749 case MVT::i32:
750 Opcode = NVPTX::LD_i32_avar;
751 break;
752 case MVT::i64:
753 Opcode = NVPTX::LD_i64_avar;
754 break;
755 case MVT::f32:
756 Opcode = NVPTX::LD_f32_avar;
757 break;
758 case MVT::f64:
759 Opcode = NVPTX::LD_f64_avar;
760 break;
761 default:
762 return false;
763 }
764 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
765 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
766 getI32Imm(fromTypeWidth, dl), Addr, Chain };
767 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
768 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
769 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
770 switch (TargetVT) {
771 case MVT::i8:
772 Opcode = NVPTX::LD_i8_asi;
773 break;
774 case MVT::i16:
775 Opcode = NVPTX::LD_i16_asi;
776 break;
777 case MVT::i32:
778 Opcode = NVPTX::LD_i32_asi;
779 break;
780 case MVT::i64:
781 Opcode = NVPTX::LD_i64_asi;
782 break;
783 case MVT::f32:
784 Opcode = NVPTX::LD_f32_asi;
785 break;
786 case MVT::f64:
787 Opcode = NVPTX::LD_f64_asi;
788 break;
789 default:
790 return false;
791 }
792 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
793 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
794 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
795 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
796 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
797 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
798 if (TM.is64Bit()) {
799 switch (TargetVT) {
800 case MVT::i8:
801 Opcode = NVPTX::LD_i8_ari_64;
802 break;
803 case MVT::i16:
804 Opcode = NVPTX::LD_i16_ari_64;
805 break;
806 case MVT::i32:
807 Opcode = NVPTX::LD_i32_ari_64;
808 break;
809 case MVT::i64:
810 Opcode = NVPTX::LD_i64_ari_64;
811 break;
812 case MVT::f32:
813 Opcode = NVPTX::LD_f32_ari_64;
814 break;
815 case MVT::f64:
816 Opcode = NVPTX::LD_f64_ari_64;
817 break;
818 default:
819 return false;
820 }
821 } else {
822 switch (TargetVT) {
823 case MVT::i8:
824 Opcode = NVPTX::LD_i8_ari;
825 break;
826 case MVT::i16:
827 Opcode = NVPTX::LD_i16_ari;
828 break;
829 case MVT::i32:
830 Opcode = NVPTX::LD_i32_ari;
831 break;
832 case MVT::i64:
833 Opcode = NVPTX::LD_i64_ari;
834 break;
835 case MVT::f32:
836 Opcode = NVPTX::LD_f32_ari;
837 break;
838 case MVT::f64:
839 Opcode = NVPTX::LD_f64_ari;
840 break;
841 default:
842 return false;
843 }
844 }
845 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
846 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
847 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
848 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
849 } else {
850 if (TM.is64Bit()) {
851 switch (TargetVT) {
852 case MVT::i8:
853 Opcode = NVPTX::LD_i8_areg_64;
854 break;
855 case MVT::i16:
856 Opcode = NVPTX::LD_i16_areg_64;
857 break;
858 case MVT::i32:
859 Opcode = NVPTX::LD_i32_areg_64;
860 break;
861 case MVT::i64:
862 Opcode = NVPTX::LD_i64_areg_64;
863 break;
864 case MVT::f32:
865 Opcode = NVPTX::LD_f32_areg_64;
866 break;
867 case MVT::f64:
868 Opcode = NVPTX::LD_f64_areg_64;
869 break;
870 default:
871 return false;
872 }
873 } else {
874 switch (TargetVT) {
875 case MVT::i8:
876 Opcode = NVPTX::LD_i8_areg;
877 break;
878 case MVT::i16:
879 Opcode = NVPTX::LD_i16_areg;
880 break;
881 case MVT::i32:
882 Opcode = NVPTX::LD_i32_areg;
883 break;
884 case MVT::i64:
885 Opcode = NVPTX::LD_i64_areg;
886 break;
887 case MVT::f32:
888 Opcode = NVPTX::LD_f32_areg;
889 break;
890 case MVT::f64:
891 Opcode = NVPTX::LD_f64_areg;
892 break;
893 default:
894 return false;
895 }
896 }
897 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
898 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
899 getI32Imm(fromTypeWidth, dl), N1, Chain };
900 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
901 }
902
903 if (!NVPTXLD)
904 return false;
905
906 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
907 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
908 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
909
910 ReplaceNode(N, NVPTXLD);
911 return true;
912 }
913
tryLoadVector(SDNode * N)914 bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
915
916 SDValue Chain = N->getOperand(0);
917 SDValue Op1 = N->getOperand(1);
918 SDValue Addr, Offset, Base;
919 unsigned Opcode;
920 SDLoc DL(N);
921 SDNode *LD;
922 MemSDNode *MemSD = cast<MemSDNode>(N);
923 EVT LoadedVT = MemSD->getMemoryVT();
924
925 if (!LoadedVT.isSimple())
926 return false;
927
928 // Address Space Setting
929 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
930
931 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
932 return tryLDGLDU(N);
933 }
934
935 // Volatile Setting
936 // - .volatile is only availalble for .global and .shared
937 bool IsVolatile = MemSD->isVolatile();
938 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
939 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
940 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
941 IsVolatile = false;
942
943 // Vector Setting
944 MVT SimpleVT = LoadedVT.getSimpleVT();
945
946 // Type Setting: fromType + fromTypeWidth
947 //
948 // Sign : ISD::SEXTLOAD
949 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
950 // type is integer
951 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
952 MVT ScalarVT = SimpleVT.getScalarType();
953 // Read at least 8 bits (predicates are stored as 8-bit values)
954 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
955 unsigned int FromType;
956 // The last operand holds the original LoadSDNode::getExtensionType() value
957 unsigned ExtensionType = cast<ConstantSDNode>(
958 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
959 if (ExtensionType == ISD::SEXTLOAD)
960 FromType = NVPTX::PTXLdStInstCode::Signed;
961 else if (ScalarVT.isFloatingPoint())
962 FromType = NVPTX::PTXLdStInstCode::Float;
963 else
964 FromType = NVPTX::PTXLdStInstCode::Unsigned;
965
966 unsigned VecType;
967
968 switch (N->getOpcode()) {
969 case NVPTXISD::LoadV2:
970 VecType = NVPTX::PTXLdStInstCode::V2;
971 break;
972 case NVPTXISD::LoadV4:
973 VecType = NVPTX::PTXLdStInstCode::V4;
974 break;
975 default:
976 return false;
977 }
978
979 EVT EltVT = N->getValueType(0);
980
981 if (SelectDirectAddr(Op1, Addr)) {
982 switch (N->getOpcode()) {
983 default:
984 return false;
985 case NVPTXISD::LoadV2:
986 switch (EltVT.getSimpleVT().SimpleTy) {
987 default:
988 return false;
989 case MVT::i8:
990 Opcode = NVPTX::LDV_i8_v2_avar;
991 break;
992 case MVT::i16:
993 Opcode = NVPTX::LDV_i16_v2_avar;
994 break;
995 case MVT::i32:
996 Opcode = NVPTX::LDV_i32_v2_avar;
997 break;
998 case MVT::i64:
999 Opcode = NVPTX::LDV_i64_v2_avar;
1000 break;
1001 case MVT::f32:
1002 Opcode = NVPTX::LDV_f32_v2_avar;
1003 break;
1004 case MVT::f64:
1005 Opcode = NVPTX::LDV_f64_v2_avar;
1006 break;
1007 }
1008 break;
1009 case NVPTXISD::LoadV4:
1010 switch (EltVT.getSimpleVT().SimpleTy) {
1011 default:
1012 return false;
1013 case MVT::i8:
1014 Opcode = NVPTX::LDV_i8_v4_avar;
1015 break;
1016 case MVT::i16:
1017 Opcode = NVPTX::LDV_i16_v4_avar;
1018 break;
1019 case MVT::i32:
1020 Opcode = NVPTX::LDV_i32_v4_avar;
1021 break;
1022 case MVT::f32:
1023 Opcode = NVPTX::LDV_f32_v4_avar;
1024 break;
1025 }
1026 break;
1027 }
1028
1029 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1030 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1031 getI32Imm(FromTypeWidth, DL), Addr, Chain };
1032 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1033 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1034 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
1035 switch (N->getOpcode()) {
1036 default:
1037 return false;
1038 case NVPTXISD::LoadV2:
1039 switch (EltVT.getSimpleVT().SimpleTy) {
1040 default:
1041 return false;
1042 case MVT::i8:
1043 Opcode = NVPTX::LDV_i8_v2_asi;
1044 break;
1045 case MVT::i16:
1046 Opcode = NVPTX::LDV_i16_v2_asi;
1047 break;
1048 case MVT::i32:
1049 Opcode = NVPTX::LDV_i32_v2_asi;
1050 break;
1051 case MVT::i64:
1052 Opcode = NVPTX::LDV_i64_v2_asi;
1053 break;
1054 case MVT::f32:
1055 Opcode = NVPTX::LDV_f32_v2_asi;
1056 break;
1057 case MVT::f64:
1058 Opcode = NVPTX::LDV_f64_v2_asi;
1059 break;
1060 }
1061 break;
1062 case NVPTXISD::LoadV4:
1063 switch (EltVT.getSimpleVT().SimpleTy) {
1064 default:
1065 return false;
1066 case MVT::i8:
1067 Opcode = NVPTX::LDV_i8_v4_asi;
1068 break;
1069 case MVT::i16:
1070 Opcode = NVPTX::LDV_i16_v4_asi;
1071 break;
1072 case MVT::i32:
1073 Opcode = NVPTX::LDV_i32_v4_asi;
1074 break;
1075 case MVT::f32:
1076 Opcode = NVPTX::LDV_f32_v4_asi;
1077 break;
1078 }
1079 break;
1080 }
1081
1082 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1083 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1084 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1085 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1086 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1087 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1088 if (TM.is64Bit()) {
1089 switch (N->getOpcode()) {
1090 default:
1091 return false;
1092 case NVPTXISD::LoadV2:
1093 switch (EltVT.getSimpleVT().SimpleTy) {
1094 default:
1095 return false;
1096 case MVT::i8:
1097 Opcode = NVPTX::LDV_i8_v2_ari_64;
1098 break;
1099 case MVT::i16:
1100 Opcode = NVPTX::LDV_i16_v2_ari_64;
1101 break;
1102 case MVT::i32:
1103 Opcode = NVPTX::LDV_i32_v2_ari_64;
1104 break;
1105 case MVT::i64:
1106 Opcode = NVPTX::LDV_i64_v2_ari_64;
1107 break;
1108 case MVT::f32:
1109 Opcode = NVPTX::LDV_f32_v2_ari_64;
1110 break;
1111 case MVT::f64:
1112 Opcode = NVPTX::LDV_f64_v2_ari_64;
1113 break;
1114 }
1115 break;
1116 case NVPTXISD::LoadV4:
1117 switch (EltVT.getSimpleVT().SimpleTy) {
1118 default:
1119 return false;
1120 case MVT::i8:
1121 Opcode = NVPTX::LDV_i8_v4_ari_64;
1122 break;
1123 case MVT::i16:
1124 Opcode = NVPTX::LDV_i16_v4_ari_64;
1125 break;
1126 case MVT::i32:
1127 Opcode = NVPTX::LDV_i32_v4_ari_64;
1128 break;
1129 case MVT::f32:
1130 Opcode = NVPTX::LDV_f32_v4_ari_64;
1131 break;
1132 }
1133 break;
1134 }
1135 } else {
1136 switch (N->getOpcode()) {
1137 default:
1138 return false;
1139 case NVPTXISD::LoadV2:
1140 switch (EltVT.getSimpleVT().SimpleTy) {
1141 default:
1142 return false;
1143 case MVT::i8:
1144 Opcode = NVPTX::LDV_i8_v2_ari;
1145 break;
1146 case MVT::i16:
1147 Opcode = NVPTX::LDV_i16_v2_ari;
1148 break;
1149 case MVT::i32:
1150 Opcode = NVPTX::LDV_i32_v2_ari;
1151 break;
1152 case MVT::i64:
1153 Opcode = NVPTX::LDV_i64_v2_ari;
1154 break;
1155 case MVT::f32:
1156 Opcode = NVPTX::LDV_f32_v2_ari;
1157 break;
1158 case MVT::f64:
1159 Opcode = NVPTX::LDV_f64_v2_ari;
1160 break;
1161 }
1162 break;
1163 case NVPTXISD::LoadV4:
1164 switch (EltVT.getSimpleVT().SimpleTy) {
1165 default:
1166 return false;
1167 case MVT::i8:
1168 Opcode = NVPTX::LDV_i8_v4_ari;
1169 break;
1170 case MVT::i16:
1171 Opcode = NVPTX::LDV_i16_v4_ari;
1172 break;
1173 case MVT::i32:
1174 Opcode = NVPTX::LDV_i32_v4_ari;
1175 break;
1176 case MVT::f32:
1177 Opcode = NVPTX::LDV_f32_v4_ari;
1178 break;
1179 }
1180 break;
1181 }
1182 }
1183
1184 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1185 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1186 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
1187
1188 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1189 } else {
1190 if (TM.is64Bit()) {
1191 switch (N->getOpcode()) {
1192 default:
1193 return false;
1194 case NVPTXISD::LoadV2:
1195 switch (EltVT.getSimpleVT().SimpleTy) {
1196 default:
1197 return false;
1198 case MVT::i8:
1199 Opcode = NVPTX::LDV_i8_v2_areg_64;
1200 break;
1201 case MVT::i16:
1202 Opcode = NVPTX::LDV_i16_v2_areg_64;
1203 break;
1204 case MVT::i32:
1205 Opcode = NVPTX::LDV_i32_v2_areg_64;
1206 break;
1207 case MVT::i64:
1208 Opcode = NVPTX::LDV_i64_v2_areg_64;
1209 break;
1210 case MVT::f32:
1211 Opcode = NVPTX::LDV_f32_v2_areg_64;
1212 break;
1213 case MVT::f64:
1214 Opcode = NVPTX::LDV_f64_v2_areg_64;
1215 break;
1216 }
1217 break;
1218 case NVPTXISD::LoadV4:
1219 switch (EltVT.getSimpleVT().SimpleTy) {
1220 default:
1221 return false;
1222 case MVT::i8:
1223 Opcode = NVPTX::LDV_i8_v4_areg_64;
1224 break;
1225 case MVT::i16:
1226 Opcode = NVPTX::LDV_i16_v4_areg_64;
1227 break;
1228 case MVT::i32:
1229 Opcode = NVPTX::LDV_i32_v4_areg_64;
1230 break;
1231 case MVT::f32:
1232 Opcode = NVPTX::LDV_f32_v4_areg_64;
1233 break;
1234 }
1235 break;
1236 }
1237 } else {
1238 switch (N->getOpcode()) {
1239 default:
1240 return false;
1241 case NVPTXISD::LoadV2:
1242 switch (EltVT.getSimpleVT().SimpleTy) {
1243 default:
1244 return false;
1245 case MVT::i8:
1246 Opcode = NVPTX::LDV_i8_v2_areg;
1247 break;
1248 case MVT::i16:
1249 Opcode = NVPTX::LDV_i16_v2_areg;
1250 break;
1251 case MVT::i32:
1252 Opcode = NVPTX::LDV_i32_v2_areg;
1253 break;
1254 case MVT::i64:
1255 Opcode = NVPTX::LDV_i64_v2_areg;
1256 break;
1257 case MVT::f32:
1258 Opcode = NVPTX::LDV_f32_v2_areg;
1259 break;
1260 case MVT::f64:
1261 Opcode = NVPTX::LDV_f64_v2_areg;
1262 break;
1263 }
1264 break;
1265 case NVPTXISD::LoadV4:
1266 switch (EltVT.getSimpleVT().SimpleTy) {
1267 default:
1268 return false;
1269 case MVT::i8:
1270 Opcode = NVPTX::LDV_i8_v4_areg;
1271 break;
1272 case MVT::i16:
1273 Opcode = NVPTX::LDV_i16_v4_areg;
1274 break;
1275 case MVT::i32:
1276 Opcode = NVPTX::LDV_i32_v4_areg;
1277 break;
1278 case MVT::f32:
1279 Opcode = NVPTX::LDV_f32_v4_areg;
1280 break;
1281 }
1282 break;
1283 }
1284 }
1285
1286 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1287 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1288 getI32Imm(FromTypeWidth, DL), Op1, Chain };
1289 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1290 }
1291
1292 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1293 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1294 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1295
1296 ReplaceNode(N, LD);
1297 return true;
1298 }
1299
tryLDGLDU(SDNode * N)1300 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
1301
1302 SDValue Chain = N->getOperand(0);
1303 SDValue Op1;
1304 MemSDNode *Mem;
1305 bool IsLDG = true;
1306
1307 // If this is an LDG intrinsic, the address is the third operand. If its an
1308 // LDG/LDU SD node (from custom vector handling), then its the second operand
1309 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1310 Op1 = N->getOperand(2);
1311 Mem = cast<MemIntrinsicSDNode>(N);
1312 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1313 switch (IID) {
1314 default:
1315 return false;
1316 case Intrinsic::nvvm_ldg_global_f:
1317 case Intrinsic::nvvm_ldg_global_i:
1318 case Intrinsic::nvvm_ldg_global_p:
1319 IsLDG = true;
1320 break;
1321 case Intrinsic::nvvm_ldu_global_f:
1322 case Intrinsic::nvvm_ldu_global_i:
1323 case Intrinsic::nvvm_ldu_global_p:
1324 IsLDG = false;
1325 break;
1326 }
1327 } else {
1328 Op1 = N->getOperand(1);
1329 Mem = cast<MemSDNode>(N);
1330 }
1331
1332 unsigned Opcode;
1333 SDLoc DL(N);
1334 SDNode *LD;
1335 SDValue Base, Offset, Addr;
1336
1337 EVT EltVT = Mem->getMemoryVT();
1338 unsigned NumElts = 1;
1339 if (EltVT.isVector()) {
1340 NumElts = EltVT.getVectorNumElements();
1341 EltVT = EltVT.getVectorElementType();
1342 }
1343
1344 // Build the "promoted" result VTList for the load. If we are really loading
1345 // i8s, then the return type will be promoted to i16 since we do not expose
1346 // 8-bit registers in NVPTX.
1347 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1348 SmallVector<EVT, 5> InstVTs;
1349 for (unsigned i = 0; i != NumElts; ++i) {
1350 InstVTs.push_back(NodeVT);
1351 }
1352 InstVTs.push_back(MVT::Other);
1353 SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1354
1355 if (SelectDirectAddr(Op1, Addr)) {
1356 switch (N->getOpcode()) {
1357 default:
1358 return false;
1359 case ISD::INTRINSIC_W_CHAIN:
1360 if (IsLDG) {
1361 switch (EltVT.getSimpleVT().SimpleTy) {
1362 default:
1363 return false;
1364 case MVT::i8:
1365 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1366 break;
1367 case MVT::i16:
1368 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1369 break;
1370 case MVT::i32:
1371 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1372 break;
1373 case MVT::i64:
1374 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1375 break;
1376 case MVT::f32:
1377 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1378 break;
1379 case MVT::f64:
1380 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1381 break;
1382 }
1383 } else {
1384 switch (EltVT.getSimpleVT().SimpleTy) {
1385 default:
1386 return false;
1387 case MVT::i8:
1388 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1389 break;
1390 case MVT::i16:
1391 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1392 break;
1393 case MVT::i32:
1394 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1395 break;
1396 case MVT::i64:
1397 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1398 break;
1399 case MVT::f32:
1400 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1401 break;
1402 case MVT::f64:
1403 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1404 break;
1405 }
1406 }
1407 break;
1408 case NVPTXISD::LDGV2:
1409 switch (EltVT.getSimpleVT().SimpleTy) {
1410 default:
1411 return false;
1412 case MVT::i8:
1413 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1414 break;
1415 case MVT::i16:
1416 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1417 break;
1418 case MVT::i32:
1419 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1420 break;
1421 case MVT::i64:
1422 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1423 break;
1424 case MVT::f32:
1425 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1426 break;
1427 case MVT::f64:
1428 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1429 break;
1430 }
1431 break;
1432 case NVPTXISD::LDUV2:
1433 switch (EltVT.getSimpleVT().SimpleTy) {
1434 default:
1435 return false;
1436 case MVT::i8:
1437 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1438 break;
1439 case MVT::i16:
1440 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1441 break;
1442 case MVT::i32:
1443 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1444 break;
1445 case MVT::i64:
1446 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1447 break;
1448 case MVT::f32:
1449 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1450 break;
1451 case MVT::f64:
1452 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1453 break;
1454 }
1455 break;
1456 case NVPTXISD::LDGV4:
1457 switch (EltVT.getSimpleVT().SimpleTy) {
1458 default:
1459 return false;
1460 case MVT::i8:
1461 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1462 break;
1463 case MVT::i16:
1464 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1465 break;
1466 case MVT::i32:
1467 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1468 break;
1469 case MVT::f32:
1470 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1471 break;
1472 }
1473 break;
1474 case NVPTXISD::LDUV4:
1475 switch (EltVT.getSimpleVT().SimpleTy) {
1476 default:
1477 return false;
1478 case MVT::i8:
1479 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1480 break;
1481 case MVT::i16:
1482 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1483 break;
1484 case MVT::i32:
1485 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1486 break;
1487 case MVT::f32:
1488 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1489 break;
1490 }
1491 break;
1492 }
1493
1494 SDValue Ops[] = { Addr, Chain };
1495 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
1496 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1497 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1498 if (TM.is64Bit()) {
1499 switch (N->getOpcode()) {
1500 default:
1501 return false;
1502 case ISD::LOAD:
1503 case ISD::INTRINSIC_W_CHAIN:
1504 if (IsLDG) {
1505 switch (EltVT.getSimpleVT().SimpleTy) {
1506 default:
1507 return false;
1508 case MVT::i8:
1509 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1510 break;
1511 case MVT::i16:
1512 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1513 break;
1514 case MVT::i32:
1515 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1516 break;
1517 case MVT::i64:
1518 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1519 break;
1520 case MVT::f32:
1521 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1522 break;
1523 case MVT::f64:
1524 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1525 break;
1526 }
1527 } else {
1528 switch (EltVT.getSimpleVT().SimpleTy) {
1529 default:
1530 return false;
1531 case MVT::i8:
1532 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1533 break;
1534 case MVT::i16:
1535 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1536 break;
1537 case MVT::i32:
1538 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1539 break;
1540 case MVT::i64:
1541 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1542 break;
1543 case MVT::f32:
1544 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1545 break;
1546 case MVT::f64:
1547 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1548 break;
1549 }
1550 }
1551 break;
1552 case NVPTXISD::LoadV2:
1553 case NVPTXISD::LDGV2:
1554 switch (EltVT.getSimpleVT().SimpleTy) {
1555 default:
1556 return false;
1557 case MVT::i8:
1558 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1559 break;
1560 case MVT::i16:
1561 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1562 break;
1563 case MVT::i32:
1564 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1565 break;
1566 case MVT::i64:
1567 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1568 break;
1569 case MVT::f32:
1570 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1571 break;
1572 case MVT::f64:
1573 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1574 break;
1575 }
1576 break;
1577 case NVPTXISD::LDUV2:
1578 switch (EltVT.getSimpleVT().SimpleTy) {
1579 default:
1580 return false;
1581 case MVT::i8:
1582 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1583 break;
1584 case MVT::i16:
1585 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1586 break;
1587 case MVT::i32:
1588 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1589 break;
1590 case MVT::i64:
1591 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1592 break;
1593 case MVT::f32:
1594 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1595 break;
1596 case MVT::f64:
1597 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1598 break;
1599 }
1600 break;
1601 case NVPTXISD::LoadV4:
1602 case NVPTXISD::LDGV4:
1603 switch (EltVT.getSimpleVT().SimpleTy) {
1604 default:
1605 return false;
1606 case MVT::i8:
1607 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1608 break;
1609 case MVT::i16:
1610 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1611 break;
1612 case MVT::i32:
1613 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1614 break;
1615 case MVT::f32:
1616 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1617 break;
1618 }
1619 break;
1620 case NVPTXISD::LDUV4:
1621 switch (EltVT.getSimpleVT().SimpleTy) {
1622 default:
1623 return false;
1624 case MVT::i8:
1625 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1626 break;
1627 case MVT::i16:
1628 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1629 break;
1630 case MVT::i32:
1631 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1632 break;
1633 case MVT::f32:
1634 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1635 break;
1636 }
1637 break;
1638 }
1639 } else {
1640 switch (N->getOpcode()) {
1641 default:
1642 return false;
1643 case ISD::LOAD:
1644 case ISD::INTRINSIC_W_CHAIN:
1645 if (IsLDG) {
1646 switch (EltVT.getSimpleVT().SimpleTy) {
1647 default:
1648 return false;
1649 case MVT::i8:
1650 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1651 break;
1652 case MVT::i16:
1653 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1654 break;
1655 case MVT::i32:
1656 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1657 break;
1658 case MVT::i64:
1659 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1660 break;
1661 case MVT::f32:
1662 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1663 break;
1664 case MVT::f64:
1665 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1666 break;
1667 }
1668 } else {
1669 switch (EltVT.getSimpleVT().SimpleTy) {
1670 default:
1671 return false;
1672 case MVT::i8:
1673 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1674 break;
1675 case MVT::i16:
1676 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1677 break;
1678 case MVT::i32:
1679 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1680 break;
1681 case MVT::i64:
1682 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1683 break;
1684 case MVT::f32:
1685 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1686 break;
1687 case MVT::f64:
1688 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1689 break;
1690 }
1691 }
1692 break;
1693 case NVPTXISD::LoadV2:
1694 case NVPTXISD::LDGV2:
1695 switch (EltVT.getSimpleVT().SimpleTy) {
1696 default:
1697 return false;
1698 case MVT::i8:
1699 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1700 break;
1701 case MVT::i16:
1702 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1703 break;
1704 case MVT::i32:
1705 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1706 break;
1707 case MVT::i64:
1708 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1709 break;
1710 case MVT::f32:
1711 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1712 break;
1713 case MVT::f64:
1714 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1715 break;
1716 }
1717 break;
1718 case NVPTXISD::LDUV2:
1719 switch (EltVT.getSimpleVT().SimpleTy) {
1720 default:
1721 return false;
1722 case MVT::i8:
1723 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1724 break;
1725 case MVT::i16:
1726 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1727 break;
1728 case MVT::i32:
1729 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1730 break;
1731 case MVT::i64:
1732 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1733 break;
1734 case MVT::f32:
1735 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1736 break;
1737 case MVT::f64:
1738 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1739 break;
1740 }
1741 break;
1742 case NVPTXISD::LoadV4:
1743 case NVPTXISD::LDGV4:
1744 switch (EltVT.getSimpleVT().SimpleTy) {
1745 default:
1746 return false;
1747 case MVT::i8:
1748 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1749 break;
1750 case MVT::i16:
1751 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1752 break;
1753 case MVT::i32:
1754 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1755 break;
1756 case MVT::f32:
1757 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1758 break;
1759 }
1760 break;
1761 case NVPTXISD::LDUV4:
1762 switch (EltVT.getSimpleVT().SimpleTy) {
1763 default:
1764 return false;
1765 case MVT::i8:
1766 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1767 break;
1768 case MVT::i16:
1769 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1770 break;
1771 case MVT::i32:
1772 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1773 break;
1774 case MVT::f32:
1775 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1776 break;
1777 }
1778 break;
1779 }
1780 }
1781
1782 SDValue Ops[] = { Base, Offset, Chain };
1783
1784 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
1785 } else {
1786 if (TM.is64Bit()) {
1787 switch (N->getOpcode()) {
1788 default:
1789 return false;
1790 case ISD::LOAD:
1791 case ISD::INTRINSIC_W_CHAIN:
1792 if (IsLDG) {
1793 switch (EltVT.getSimpleVT().SimpleTy) {
1794 default:
1795 return false;
1796 case MVT::i8:
1797 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1798 break;
1799 case MVT::i16:
1800 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1801 break;
1802 case MVT::i32:
1803 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1804 break;
1805 case MVT::i64:
1806 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1807 break;
1808 case MVT::f32:
1809 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1810 break;
1811 case MVT::f64:
1812 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1813 break;
1814 }
1815 } else {
1816 switch (EltVT.getSimpleVT().SimpleTy) {
1817 default:
1818 return false;
1819 case MVT::i8:
1820 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1821 break;
1822 case MVT::i16:
1823 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1824 break;
1825 case MVT::i32:
1826 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1827 break;
1828 case MVT::i64:
1829 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1830 break;
1831 case MVT::f32:
1832 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1833 break;
1834 case MVT::f64:
1835 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1836 break;
1837 }
1838 }
1839 break;
1840 case NVPTXISD::LoadV2:
1841 case NVPTXISD::LDGV2:
1842 switch (EltVT.getSimpleVT().SimpleTy) {
1843 default:
1844 return false;
1845 case MVT::i8:
1846 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1847 break;
1848 case MVT::i16:
1849 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1850 break;
1851 case MVT::i32:
1852 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1853 break;
1854 case MVT::i64:
1855 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1856 break;
1857 case MVT::f32:
1858 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1859 break;
1860 case MVT::f64:
1861 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1862 break;
1863 }
1864 break;
1865 case NVPTXISD::LDUV2:
1866 switch (EltVT.getSimpleVT().SimpleTy) {
1867 default:
1868 return false;
1869 case MVT::i8:
1870 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1871 break;
1872 case MVT::i16:
1873 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1874 break;
1875 case MVT::i32:
1876 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1877 break;
1878 case MVT::i64:
1879 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1880 break;
1881 case MVT::f32:
1882 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1883 break;
1884 case MVT::f64:
1885 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1886 break;
1887 }
1888 break;
1889 case NVPTXISD::LoadV4:
1890 case NVPTXISD::LDGV4:
1891 switch (EltVT.getSimpleVT().SimpleTy) {
1892 default:
1893 return false;
1894 case MVT::i8:
1895 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1896 break;
1897 case MVT::i16:
1898 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1899 break;
1900 case MVT::i32:
1901 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1902 break;
1903 case MVT::f32:
1904 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1905 break;
1906 }
1907 break;
1908 case NVPTXISD::LDUV4:
1909 switch (EltVT.getSimpleVT().SimpleTy) {
1910 default:
1911 return false;
1912 case MVT::i8:
1913 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1914 break;
1915 case MVT::i16:
1916 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1917 break;
1918 case MVT::i32:
1919 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1920 break;
1921 case MVT::f32:
1922 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1923 break;
1924 }
1925 break;
1926 }
1927 } else {
1928 switch (N->getOpcode()) {
1929 default:
1930 return false;
1931 case ISD::LOAD:
1932 case ISD::INTRINSIC_W_CHAIN:
1933 if (IsLDG) {
1934 switch (EltVT.getSimpleVT().SimpleTy) {
1935 default:
1936 return false;
1937 case MVT::i8:
1938 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1939 break;
1940 case MVT::i16:
1941 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1942 break;
1943 case MVT::i32:
1944 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1945 break;
1946 case MVT::i64:
1947 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1948 break;
1949 case MVT::f32:
1950 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1951 break;
1952 case MVT::f64:
1953 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1954 break;
1955 }
1956 } else {
1957 switch (EltVT.getSimpleVT().SimpleTy) {
1958 default:
1959 return false;
1960 case MVT::i8:
1961 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1962 break;
1963 case MVT::i16:
1964 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1965 break;
1966 case MVT::i32:
1967 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1968 break;
1969 case MVT::i64:
1970 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1971 break;
1972 case MVT::f32:
1973 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1974 break;
1975 case MVT::f64:
1976 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1977 break;
1978 }
1979 }
1980 break;
1981 case NVPTXISD::LoadV2:
1982 case NVPTXISD::LDGV2:
1983 switch (EltVT.getSimpleVT().SimpleTy) {
1984 default:
1985 return false;
1986 case MVT::i8:
1987 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1988 break;
1989 case MVT::i16:
1990 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1991 break;
1992 case MVT::i32:
1993 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1994 break;
1995 case MVT::i64:
1996 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1997 break;
1998 case MVT::f32:
1999 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
2000 break;
2001 case MVT::f64:
2002 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
2003 break;
2004 }
2005 break;
2006 case NVPTXISD::LDUV2:
2007 switch (EltVT.getSimpleVT().SimpleTy) {
2008 default:
2009 return false;
2010 case MVT::i8:
2011 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
2012 break;
2013 case MVT::i16:
2014 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
2015 break;
2016 case MVT::i32:
2017 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
2018 break;
2019 case MVT::i64:
2020 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
2021 break;
2022 case MVT::f32:
2023 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
2024 break;
2025 case MVT::f64:
2026 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
2027 break;
2028 }
2029 break;
2030 case NVPTXISD::LoadV4:
2031 case NVPTXISD::LDGV4:
2032 switch (EltVT.getSimpleVT().SimpleTy) {
2033 default:
2034 return false;
2035 case MVT::i8:
2036 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
2037 break;
2038 case MVT::i16:
2039 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
2040 break;
2041 case MVT::i32:
2042 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
2043 break;
2044 case MVT::f32:
2045 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
2046 break;
2047 }
2048 break;
2049 case NVPTXISD::LDUV4:
2050 switch (EltVT.getSimpleVT().SimpleTy) {
2051 default:
2052 return false;
2053 case MVT::i8:
2054 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
2055 break;
2056 case MVT::i16:
2057 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
2058 break;
2059 case MVT::i32:
2060 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
2061 break;
2062 case MVT::f32:
2063 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
2064 break;
2065 }
2066 break;
2067 }
2068 }
2069
2070 SDValue Ops[] = { Op1, Chain };
2071 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
2072 }
2073
2074 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2075 MemRefs0[0] = Mem->getMemOperand();
2076 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2077
2078 // For automatic generation of LDG (through SelectLoad[Vector], not the
2079 // intrinsics), we may have an extending load like:
2080 //
2081 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
2082 //
2083 // In this case, the matching logic above will select a load for the original
2084 // memory type (in this case, i8) and our types will not match (the node needs
2085 // to return an i32 in this case). Our LDG/LDU nodes do not support the
2086 // concept of sign-/zero-extension, so emulate it here by adding an explicit
2087 // CVT instruction. Ptxas should clean up any redundancies here.
2088
2089 EVT OrigType = N->getValueType(0);
2090 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
2091
2092 if (OrigType != EltVT && LdNode) {
2093 // We have an extending-load. The instruction we selected operates on the
2094 // smaller type, but the SDNode we are replacing has the larger type. We
2095 // need to emit a CVT to make the types match.
2096 bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
2097 unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
2098 EltVT.getSimpleVT(), IsSigned);
2099
2100 // For each output value, apply the manual sign/zero-extension and make sure
2101 // all users of the load go through that CVT.
2102 for (unsigned i = 0; i != NumElts; ++i) {
2103 SDValue Res(LD, i);
2104 SDValue OrigVal(N, i);
2105
2106 SDNode *CvtNode =
2107 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
2108 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2109 DL, MVT::i32));
2110 ReplaceUses(OrigVal, SDValue(CvtNode, 0));
2111 }
2112 }
2113
2114 ReplaceNode(N, LD);
2115 return true;
2116 }
2117
tryStore(SDNode * N)2118 bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
2119 SDLoc dl(N);
2120 StoreSDNode *ST = cast<StoreSDNode>(N);
2121 EVT StoreVT = ST->getMemoryVT();
2122 SDNode *NVPTXST = nullptr;
2123
2124 // do not support pre/post inc/dec
2125 if (ST->isIndexed())
2126 return false;
2127
2128 if (!StoreVT.isSimple())
2129 return false;
2130
2131 // Address Space Setting
2132 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
2133
2134 // Volatile Setting
2135 // - .volatile is only availalble for .global and .shared
2136 bool isVolatile = ST->isVolatile();
2137 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2138 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2139 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2140 isVolatile = false;
2141
2142 // Vector Setting
2143 MVT SimpleVT = StoreVT.getSimpleVT();
2144 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2145 if (SimpleVT.isVector()) {
2146 unsigned num = SimpleVT.getVectorNumElements();
2147 if (num == 2)
2148 vecType = NVPTX::PTXLdStInstCode::V2;
2149 else if (num == 4)
2150 vecType = NVPTX::PTXLdStInstCode::V4;
2151 else
2152 return false;
2153 }
2154
2155 // Type Setting: toType + toTypeWidth
2156 // - for integer type, always use 'u'
2157 //
2158 MVT ScalarVT = SimpleVT.getScalarType();
2159 unsigned toTypeWidth = ScalarVT.getSizeInBits();
2160 unsigned int toType;
2161 if (ScalarVT.isFloatingPoint())
2162 toType = NVPTX::PTXLdStInstCode::Float;
2163 else
2164 toType = NVPTX::PTXLdStInstCode::Unsigned;
2165
2166 // Create the machine instruction DAG
2167 SDValue Chain = N->getOperand(0);
2168 SDValue N1 = N->getOperand(1);
2169 SDValue N2 = N->getOperand(2);
2170 SDValue Addr;
2171 SDValue Offset, Base;
2172 unsigned Opcode;
2173 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
2174
2175 if (SelectDirectAddr(N2, Addr)) {
2176 switch (SourceVT) {
2177 case MVT::i8:
2178 Opcode = NVPTX::ST_i8_avar;
2179 break;
2180 case MVT::i16:
2181 Opcode = NVPTX::ST_i16_avar;
2182 break;
2183 case MVT::i32:
2184 Opcode = NVPTX::ST_i32_avar;
2185 break;
2186 case MVT::i64:
2187 Opcode = NVPTX::ST_i64_avar;
2188 break;
2189 case MVT::f32:
2190 Opcode = NVPTX::ST_f32_avar;
2191 break;
2192 case MVT::f64:
2193 Opcode = NVPTX::ST_f64_avar;
2194 break;
2195 default:
2196 return false;
2197 }
2198 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2199 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2200 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2201 Chain };
2202 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2203 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2204 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2205 switch (SourceVT) {
2206 case MVT::i8:
2207 Opcode = NVPTX::ST_i8_asi;
2208 break;
2209 case MVT::i16:
2210 Opcode = NVPTX::ST_i16_asi;
2211 break;
2212 case MVT::i32:
2213 Opcode = NVPTX::ST_i32_asi;
2214 break;
2215 case MVT::i64:
2216 Opcode = NVPTX::ST_i64_asi;
2217 break;
2218 case MVT::f32:
2219 Opcode = NVPTX::ST_f32_asi;
2220 break;
2221 case MVT::f64:
2222 Opcode = NVPTX::ST_f64_asi;
2223 break;
2224 default:
2225 return false;
2226 }
2227 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2228 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2229 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2230 Offset, Chain };
2231 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2232 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2233 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2234 if (TM.is64Bit()) {
2235 switch (SourceVT) {
2236 case MVT::i8:
2237 Opcode = NVPTX::ST_i8_ari_64;
2238 break;
2239 case MVT::i16:
2240 Opcode = NVPTX::ST_i16_ari_64;
2241 break;
2242 case MVT::i32:
2243 Opcode = NVPTX::ST_i32_ari_64;
2244 break;
2245 case MVT::i64:
2246 Opcode = NVPTX::ST_i64_ari_64;
2247 break;
2248 case MVT::f32:
2249 Opcode = NVPTX::ST_f32_ari_64;
2250 break;
2251 case MVT::f64:
2252 Opcode = NVPTX::ST_f64_ari_64;
2253 break;
2254 default:
2255 return false;
2256 }
2257 } else {
2258 switch (SourceVT) {
2259 case MVT::i8:
2260 Opcode = NVPTX::ST_i8_ari;
2261 break;
2262 case MVT::i16:
2263 Opcode = NVPTX::ST_i16_ari;
2264 break;
2265 case MVT::i32:
2266 Opcode = NVPTX::ST_i32_ari;
2267 break;
2268 case MVT::i64:
2269 Opcode = NVPTX::ST_i64_ari;
2270 break;
2271 case MVT::f32:
2272 Opcode = NVPTX::ST_f32_ari;
2273 break;
2274 case MVT::f64:
2275 Opcode = NVPTX::ST_f64_ari;
2276 break;
2277 default:
2278 return false;
2279 }
2280 }
2281 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2282 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2283 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2284 Offset, Chain };
2285 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2286 } else {
2287 if (TM.is64Bit()) {
2288 switch (SourceVT) {
2289 case MVT::i8:
2290 Opcode = NVPTX::ST_i8_areg_64;
2291 break;
2292 case MVT::i16:
2293 Opcode = NVPTX::ST_i16_areg_64;
2294 break;
2295 case MVT::i32:
2296 Opcode = NVPTX::ST_i32_areg_64;
2297 break;
2298 case MVT::i64:
2299 Opcode = NVPTX::ST_i64_areg_64;
2300 break;
2301 case MVT::f32:
2302 Opcode = NVPTX::ST_f32_areg_64;
2303 break;
2304 case MVT::f64:
2305 Opcode = NVPTX::ST_f64_areg_64;
2306 break;
2307 default:
2308 return false;
2309 }
2310 } else {
2311 switch (SourceVT) {
2312 case MVT::i8:
2313 Opcode = NVPTX::ST_i8_areg;
2314 break;
2315 case MVT::i16:
2316 Opcode = NVPTX::ST_i16_areg;
2317 break;
2318 case MVT::i32:
2319 Opcode = NVPTX::ST_i32_areg;
2320 break;
2321 case MVT::i64:
2322 Opcode = NVPTX::ST_i64_areg;
2323 break;
2324 case MVT::f32:
2325 Opcode = NVPTX::ST_f32_areg;
2326 break;
2327 case MVT::f64:
2328 Opcode = NVPTX::ST_f64_areg;
2329 break;
2330 default:
2331 return false;
2332 }
2333 }
2334 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2335 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2336 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2337 Chain };
2338 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
2339 }
2340
2341 if (!NVPTXST)
2342 return false;
2343
2344 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2345 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2346 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2347 ReplaceNode(N, NVPTXST);
2348 return true;
2349 }
2350
tryStoreVector(SDNode * N)2351 bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
2352 SDValue Chain = N->getOperand(0);
2353 SDValue Op1 = N->getOperand(1);
2354 SDValue Addr, Offset, Base;
2355 unsigned Opcode;
2356 SDLoc DL(N);
2357 SDNode *ST;
2358 EVT EltVT = Op1.getValueType();
2359 MemSDNode *MemSD = cast<MemSDNode>(N);
2360 EVT StoreVT = MemSD->getMemoryVT();
2361
2362 // Address Space Setting
2363 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
2364
2365 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2366 report_fatal_error("Cannot store to pointer that points to constant "
2367 "memory space");
2368 }
2369
2370 // Volatile Setting
2371 // - .volatile is only availalble for .global and .shared
2372 bool IsVolatile = MemSD->isVolatile();
2373 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2374 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2375 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2376 IsVolatile = false;
2377
2378 // Type Setting: toType + toTypeWidth
2379 // - for integer type, always use 'u'
2380 assert(StoreVT.isSimple() && "Store value is not simple");
2381 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2382 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2383 unsigned ToType;
2384 if (ScalarVT.isFloatingPoint())
2385 ToType = NVPTX::PTXLdStInstCode::Float;
2386 else
2387 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2388
2389 SmallVector<SDValue, 12> StOps;
2390 SDValue N2;
2391 unsigned VecType;
2392
2393 switch (N->getOpcode()) {
2394 case NVPTXISD::StoreV2:
2395 VecType = NVPTX::PTXLdStInstCode::V2;
2396 StOps.push_back(N->getOperand(1));
2397 StOps.push_back(N->getOperand(2));
2398 N2 = N->getOperand(3);
2399 break;
2400 case NVPTXISD::StoreV4:
2401 VecType = NVPTX::PTXLdStInstCode::V4;
2402 StOps.push_back(N->getOperand(1));
2403 StOps.push_back(N->getOperand(2));
2404 StOps.push_back(N->getOperand(3));
2405 StOps.push_back(N->getOperand(4));
2406 N2 = N->getOperand(5);
2407 break;
2408 default:
2409 return false;
2410 }
2411
2412 StOps.push_back(getI32Imm(IsVolatile, DL));
2413 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2414 StOps.push_back(getI32Imm(VecType, DL));
2415 StOps.push_back(getI32Imm(ToType, DL));
2416 StOps.push_back(getI32Imm(ToTypeWidth, DL));
2417
2418 if (SelectDirectAddr(N2, Addr)) {
2419 switch (N->getOpcode()) {
2420 default:
2421 return false;
2422 case NVPTXISD::StoreV2:
2423 switch (EltVT.getSimpleVT().SimpleTy) {
2424 default:
2425 return false;
2426 case MVT::i8:
2427 Opcode = NVPTX::STV_i8_v2_avar;
2428 break;
2429 case MVT::i16:
2430 Opcode = NVPTX::STV_i16_v2_avar;
2431 break;
2432 case MVT::i32:
2433 Opcode = NVPTX::STV_i32_v2_avar;
2434 break;
2435 case MVT::i64:
2436 Opcode = NVPTX::STV_i64_v2_avar;
2437 break;
2438 case MVT::f32:
2439 Opcode = NVPTX::STV_f32_v2_avar;
2440 break;
2441 case MVT::f64:
2442 Opcode = NVPTX::STV_f64_v2_avar;
2443 break;
2444 }
2445 break;
2446 case NVPTXISD::StoreV4:
2447 switch (EltVT.getSimpleVT().SimpleTy) {
2448 default:
2449 return false;
2450 case MVT::i8:
2451 Opcode = NVPTX::STV_i8_v4_avar;
2452 break;
2453 case MVT::i16:
2454 Opcode = NVPTX::STV_i16_v4_avar;
2455 break;
2456 case MVT::i32:
2457 Opcode = NVPTX::STV_i32_v4_avar;
2458 break;
2459 case MVT::f32:
2460 Opcode = NVPTX::STV_f32_v4_avar;
2461 break;
2462 }
2463 break;
2464 }
2465 StOps.push_back(Addr);
2466 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2467 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2468 switch (N->getOpcode()) {
2469 default:
2470 return false;
2471 case NVPTXISD::StoreV2:
2472 switch (EltVT.getSimpleVT().SimpleTy) {
2473 default:
2474 return false;
2475 case MVT::i8:
2476 Opcode = NVPTX::STV_i8_v2_asi;
2477 break;
2478 case MVT::i16:
2479 Opcode = NVPTX::STV_i16_v2_asi;
2480 break;
2481 case MVT::i32:
2482 Opcode = NVPTX::STV_i32_v2_asi;
2483 break;
2484 case MVT::i64:
2485 Opcode = NVPTX::STV_i64_v2_asi;
2486 break;
2487 case MVT::f32:
2488 Opcode = NVPTX::STV_f32_v2_asi;
2489 break;
2490 case MVT::f64:
2491 Opcode = NVPTX::STV_f64_v2_asi;
2492 break;
2493 }
2494 break;
2495 case NVPTXISD::StoreV4:
2496 switch (EltVT.getSimpleVT().SimpleTy) {
2497 default:
2498 return false;
2499 case MVT::i8:
2500 Opcode = NVPTX::STV_i8_v4_asi;
2501 break;
2502 case MVT::i16:
2503 Opcode = NVPTX::STV_i16_v4_asi;
2504 break;
2505 case MVT::i32:
2506 Opcode = NVPTX::STV_i32_v4_asi;
2507 break;
2508 case MVT::f32:
2509 Opcode = NVPTX::STV_f32_v4_asi;
2510 break;
2511 }
2512 break;
2513 }
2514 StOps.push_back(Base);
2515 StOps.push_back(Offset);
2516 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2517 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2518 if (TM.is64Bit()) {
2519 switch (N->getOpcode()) {
2520 default:
2521 return false;
2522 case NVPTXISD::StoreV2:
2523 switch (EltVT.getSimpleVT().SimpleTy) {
2524 default:
2525 return false;
2526 case MVT::i8:
2527 Opcode = NVPTX::STV_i8_v2_ari_64;
2528 break;
2529 case MVT::i16:
2530 Opcode = NVPTX::STV_i16_v2_ari_64;
2531 break;
2532 case MVT::i32:
2533 Opcode = NVPTX::STV_i32_v2_ari_64;
2534 break;
2535 case MVT::i64:
2536 Opcode = NVPTX::STV_i64_v2_ari_64;
2537 break;
2538 case MVT::f32:
2539 Opcode = NVPTX::STV_f32_v2_ari_64;
2540 break;
2541 case MVT::f64:
2542 Opcode = NVPTX::STV_f64_v2_ari_64;
2543 break;
2544 }
2545 break;
2546 case NVPTXISD::StoreV4:
2547 switch (EltVT.getSimpleVT().SimpleTy) {
2548 default:
2549 return false;
2550 case MVT::i8:
2551 Opcode = NVPTX::STV_i8_v4_ari_64;
2552 break;
2553 case MVT::i16:
2554 Opcode = NVPTX::STV_i16_v4_ari_64;
2555 break;
2556 case MVT::i32:
2557 Opcode = NVPTX::STV_i32_v4_ari_64;
2558 break;
2559 case MVT::f32:
2560 Opcode = NVPTX::STV_f32_v4_ari_64;
2561 break;
2562 }
2563 break;
2564 }
2565 } else {
2566 switch (N->getOpcode()) {
2567 default:
2568 return false;
2569 case NVPTXISD::StoreV2:
2570 switch (EltVT.getSimpleVT().SimpleTy) {
2571 default:
2572 return false;
2573 case MVT::i8:
2574 Opcode = NVPTX::STV_i8_v2_ari;
2575 break;
2576 case MVT::i16:
2577 Opcode = NVPTX::STV_i16_v2_ari;
2578 break;
2579 case MVT::i32:
2580 Opcode = NVPTX::STV_i32_v2_ari;
2581 break;
2582 case MVT::i64:
2583 Opcode = NVPTX::STV_i64_v2_ari;
2584 break;
2585 case MVT::f32:
2586 Opcode = NVPTX::STV_f32_v2_ari;
2587 break;
2588 case MVT::f64:
2589 Opcode = NVPTX::STV_f64_v2_ari;
2590 break;
2591 }
2592 break;
2593 case NVPTXISD::StoreV4:
2594 switch (EltVT.getSimpleVT().SimpleTy) {
2595 default:
2596 return false;
2597 case MVT::i8:
2598 Opcode = NVPTX::STV_i8_v4_ari;
2599 break;
2600 case MVT::i16:
2601 Opcode = NVPTX::STV_i16_v4_ari;
2602 break;
2603 case MVT::i32:
2604 Opcode = NVPTX::STV_i32_v4_ari;
2605 break;
2606 case MVT::f32:
2607 Opcode = NVPTX::STV_f32_v4_ari;
2608 break;
2609 }
2610 break;
2611 }
2612 }
2613 StOps.push_back(Base);
2614 StOps.push_back(Offset);
2615 } else {
2616 if (TM.is64Bit()) {
2617 switch (N->getOpcode()) {
2618 default:
2619 return false;
2620 case NVPTXISD::StoreV2:
2621 switch (EltVT.getSimpleVT().SimpleTy) {
2622 default:
2623 return false;
2624 case MVT::i8:
2625 Opcode = NVPTX::STV_i8_v2_areg_64;
2626 break;
2627 case MVT::i16:
2628 Opcode = NVPTX::STV_i16_v2_areg_64;
2629 break;
2630 case MVT::i32:
2631 Opcode = NVPTX::STV_i32_v2_areg_64;
2632 break;
2633 case MVT::i64:
2634 Opcode = NVPTX::STV_i64_v2_areg_64;
2635 break;
2636 case MVT::f32:
2637 Opcode = NVPTX::STV_f32_v2_areg_64;
2638 break;
2639 case MVT::f64:
2640 Opcode = NVPTX::STV_f64_v2_areg_64;
2641 break;
2642 }
2643 break;
2644 case NVPTXISD::StoreV4:
2645 switch (EltVT.getSimpleVT().SimpleTy) {
2646 default:
2647 return false;
2648 case MVT::i8:
2649 Opcode = NVPTX::STV_i8_v4_areg_64;
2650 break;
2651 case MVT::i16:
2652 Opcode = NVPTX::STV_i16_v4_areg_64;
2653 break;
2654 case MVT::i32:
2655 Opcode = NVPTX::STV_i32_v4_areg_64;
2656 break;
2657 case MVT::f32:
2658 Opcode = NVPTX::STV_f32_v4_areg_64;
2659 break;
2660 }
2661 break;
2662 }
2663 } else {
2664 switch (N->getOpcode()) {
2665 default:
2666 return false;
2667 case NVPTXISD::StoreV2:
2668 switch (EltVT.getSimpleVT().SimpleTy) {
2669 default:
2670 return false;
2671 case MVT::i8:
2672 Opcode = NVPTX::STV_i8_v2_areg;
2673 break;
2674 case MVT::i16:
2675 Opcode = NVPTX::STV_i16_v2_areg;
2676 break;
2677 case MVT::i32:
2678 Opcode = NVPTX::STV_i32_v2_areg;
2679 break;
2680 case MVT::i64:
2681 Opcode = NVPTX::STV_i64_v2_areg;
2682 break;
2683 case MVT::f32:
2684 Opcode = NVPTX::STV_f32_v2_areg;
2685 break;
2686 case MVT::f64:
2687 Opcode = NVPTX::STV_f64_v2_areg;
2688 break;
2689 }
2690 break;
2691 case NVPTXISD::StoreV4:
2692 switch (EltVT.getSimpleVT().SimpleTy) {
2693 default:
2694 return false;
2695 case MVT::i8:
2696 Opcode = NVPTX::STV_i8_v4_areg;
2697 break;
2698 case MVT::i16:
2699 Opcode = NVPTX::STV_i16_v4_areg;
2700 break;
2701 case MVT::i32:
2702 Opcode = NVPTX::STV_i32_v4_areg;
2703 break;
2704 case MVT::f32:
2705 Opcode = NVPTX::STV_f32_v4_areg;
2706 break;
2707 }
2708 break;
2709 }
2710 }
2711 StOps.push_back(N2);
2712 }
2713
2714 StOps.push_back(Chain);
2715
2716 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2717
2718 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2719 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2720 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2721
2722 ReplaceNode(N, ST);
2723 return true;
2724 }
2725
tryLoadParam(SDNode * Node)2726 bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
2727 SDValue Chain = Node->getOperand(0);
2728 SDValue Offset = Node->getOperand(2);
2729 SDValue Flag = Node->getOperand(3);
2730 SDLoc DL(Node);
2731 MemSDNode *Mem = cast<MemSDNode>(Node);
2732
2733 unsigned VecSize;
2734 switch (Node->getOpcode()) {
2735 default:
2736 return false;
2737 case NVPTXISD::LoadParam:
2738 VecSize = 1;
2739 break;
2740 case NVPTXISD::LoadParamV2:
2741 VecSize = 2;
2742 break;
2743 case NVPTXISD::LoadParamV4:
2744 VecSize = 4;
2745 break;
2746 }
2747
2748 EVT EltVT = Node->getValueType(0);
2749 EVT MemVT = Mem->getMemoryVT();
2750
2751 unsigned Opc = 0;
2752
2753 switch (VecSize) {
2754 default:
2755 return false;
2756 case 1:
2757 switch (MemVT.getSimpleVT().SimpleTy) {
2758 default:
2759 return false;
2760 case MVT::i1:
2761 Opc = NVPTX::LoadParamMemI8;
2762 break;
2763 case MVT::i8:
2764 Opc = NVPTX::LoadParamMemI8;
2765 break;
2766 case MVT::i16:
2767 Opc = NVPTX::LoadParamMemI16;
2768 break;
2769 case MVT::i32:
2770 Opc = NVPTX::LoadParamMemI32;
2771 break;
2772 case MVT::i64:
2773 Opc = NVPTX::LoadParamMemI64;
2774 break;
2775 case MVT::f32:
2776 Opc = NVPTX::LoadParamMemF32;
2777 break;
2778 case MVT::f64:
2779 Opc = NVPTX::LoadParamMemF64;
2780 break;
2781 }
2782 break;
2783 case 2:
2784 switch (MemVT.getSimpleVT().SimpleTy) {
2785 default:
2786 return false;
2787 case MVT::i1:
2788 Opc = NVPTX::LoadParamMemV2I8;
2789 break;
2790 case MVT::i8:
2791 Opc = NVPTX::LoadParamMemV2I8;
2792 break;
2793 case MVT::i16:
2794 Opc = NVPTX::LoadParamMemV2I16;
2795 break;
2796 case MVT::i32:
2797 Opc = NVPTX::LoadParamMemV2I32;
2798 break;
2799 case MVT::i64:
2800 Opc = NVPTX::LoadParamMemV2I64;
2801 break;
2802 case MVT::f32:
2803 Opc = NVPTX::LoadParamMemV2F32;
2804 break;
2805 case MVT::f64:
2806 Opc = NVPTX::LoadParamMemV2F64;
2807 break;
2808 }
2809 break;
2810 case 4:
2811 switch (MemVT.getSimpleVT().SimpleTy) {
2812 default:
2813 return false;
2814 case MVT::i1:
2815 Opc = NVPTX::LoadParamMemV4I8;
2816 break;
2817 case MVT::i8:
2818 Opc = NVPTX::LoadParamMemV4I8;
2819 break;
2820 case MVT::i16:
2821 Opc = NVPTX::LoadParamMemV4I16;
2822 break;
2823 case MVT::i32:
2824 Opc = NVPTX::LoadParamMemV4I32;
2825 break;
2826 case MVT::f32:
2827 Opc = NVPTX::LoadParamMemV4F32;
2828 break;
2829 }
2830 break;
2831 }
2832
2833 SDVTList VTs;
2834 if (VecSize == 1) {
2835 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2836 } else if (VecSize == 2) {
2837 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2838 } else {
2839 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2840 VTs = CurDAG->getVTList(EVTs);
2841 }
2842
2843 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2844
2845 SmallVector<SDValue, 2> Ops;
2846 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2847 Ops.push_back(Chain);
2848 Ops.push_back(Flag);
2849
2850 ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, VTs, Ops));
2851 return true;
2852 }
2853
tryStoreRetval(SDNode * N)2854 bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
2855 SDLoc DL(N);
2856 SDValue Chain = N->getOperand(0);
2857 SDValue Offset = N->getOperand(1);
2858 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2859 MemSDNode *Mem = cast<MemSDNode>(N);
2860
2861 // How many elements do we have?
2862 unsigned NumElts = 1;
2863 switch (N->getOpcode()) {
2864 default:
2865 return false;
2866 case NVPTXISD::StoreRetval:
2867 NumElts = 1;
2868 break;
2869 case NVPTXISD::StoreRetvalV2:
2870 NumElts = 2;
2871 break;
2872 case NVPTXISD::StoreRetvalV4:
2873 NumElts = 4;
2874 break;
2875 }
2876
2877 // Build vector of operands
2878 SmallVector<SDValue, 6> Ops;
2879 for (unsigned i = 0; i < NumElts; ++i)
2880 Ops.push_back(N->getOperand(i + 2));
2881 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
2882 Ops.push_back(Chain);
2883
2884 // Determine target opcode
2885 // If we have an i1, use an 8-bit store. The lowering code in
2886 // NVPTXISelLowering will have already emitted an upcast.
2887 unsigned Opcode = 0;
2888 switch (NumElts) {
2889 default:
2890 return false;
2891 case 1:
2892 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2893 default:
2894 return false;
2895 case MVT::i1:
2896 Opcode = NVPTX::StoreRetvalI8;
2897 break;
2898 case MVT::i8:
2899 Opcode = NVPTX::StoreRetvalI8;
2900 break;
2901 case MVT::i16:
2902 Opcode = NVPTX::StoreRetvalI16;
2903 break;
2904 case MVT::i32:
2905 Opcode = NVPTX::StoreRetvalI32;
2906 break;
2907 case MVT::i64:
2908 Opcode = NVPTX::StoreRetvalI64;
2909 break;
2910 case MVT::f32:
2911 Opcode = NVPTX::StoreRetvalF32;
2912 break;
2913 case MVT::f64:
2914 Opcode = NVPTX::StoreRetvalF64;
2915 break;
2916 }
2917 break;
2918 case 2:
2919 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2920 default:
2921 return false;
2922 case MVT::i1:
2923 Opcode = NVPTX::StoreRetvalV2I8;
2924 break;
2925 case MVT::i8:
2926 Opcode = NVPTX::StoreRetvalV2I8;
2927 break;
2928 case MVT::i16:
2929 Opcode = NVPTX::StoreRetvalV2I16;
2930 break;
2931 case MVT::i32:
2932 Opcode = NVPTX::StoreRetvalV2I32;
2933 break;
2934 case MVT::i64:
2935 Opcode = NVPTX::StoreRetvalV2I64;
2936 break;
2937 case MVT::f32:
2938 Opcode = NVPTX::StoreRetvalV2F32;
2939 break;
2940 case MVT::f64:
2941 Opcode = NVPTX::StoreRetvalV2F64;
2942 break;
2943 }
2944 break;
2945 case 4:
2946 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2947 default:
2948 return false;
2949 case MVT::i1:
2950 Opcode = NVPTX::StoreRetvalV4I8;
2951 break;
2952 case MVT::i8:
2953 Opcode = NVPTX::StoreRetvalV4I8;
2954 break;
2955 case MVT::i16:
2956 Opcode = NVPTX::StoreRetvalV4I16;
2957 break;
2958 case MVT::i32:
2959 Opcode = NVPTX::StoreRetvalV4I32;
2960 break;
2961 case MVT::f32:
2962 Opcode = NVPTX::StoreRetvalV4F32;
2963 break;
2964 }
2965 break;
2966 }
2967
2968 SDNode *Ret =
2969 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2970 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2971 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2972 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2973
2974 ReplaceNode(N, Ret);
2975 return true;
2976 }
2977
tryStoreParam(SDNode * N)2978 bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
2979 SDLoc DL(N);
2980 SDValue Chain = N->getOperand(0);
2981 SDValue Param = N->getOperand(1);
2982 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2983 SDValue Offset = N->getOperand(2);
2984 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2985 MemSDNode *Mem = cast<MemSDNode>(N);
2986 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2987
2988 // How many elements do we have?
2989 unsigned NumElts = 1;
2990 switch (N->getOpcode()) {
2991 default:
2992 return false;
2993 case NVPTXISD::StoreParamU32:
2994 case NVPTXISD::StoreParamS32:
2995 case NVPTXISD::StoreParam:
2996 NumElts = 1;
2997 break;
2998 case NVPTXISD::StoreParamV2:
2999 NumElts = 2;
3000 break;
3001 case NVPTXISD::StoreParamV4:
3002 NumElts = 4;
3003 break;
3004 }
3005
3006 // Build vector of operands
3007 SmallVector<SDValue, 8> Ops;
3008 for (unsigned i = 0; i < NumElts; ++i)
3009 Ops.push_back(N->getOperand(i + 3));
3010 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
3011 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
3012 Ops.push_back(Chain);
3013 Ops.push_back(Flag);
3014
3015 // Determine target opcode
3016 // If we have an i1, use an 8-bit store. The lowering code in
3017 // NVPTXISelLowering will have already emitted an upcast.
3018 unsigned Opcode = 0;
3019 switch (N->getOpcode()) {
3020 default:
3021 switch (NumElts) {
3022 default:
3023 return false;
3024 case 1:
3025 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3026 default:
3027 return false;
3028 case MVT::i1:
3029 Opcode = NVPTX::StoreParamI8;
3030 break;
3031 case MVT::i8:
3032 Opcode = NVPTX::StoreParamI8;
3033 break;
3034 case MVT::i16:
3035 Opcode = NVPTX::StoreParamI16;
3036 break;
3037 case MVT::i32:
3038 Opcode = NVPTX::StoreParamI32;
3039 break;
3040 case MVT::i64:
3041 Opcode = NVPTX::StoreParamI64;
3042 break;
3043 case MVT::f32:
3044 Opcode = NVPTX::StoreParamF32;
3045 break;
3046 case MVT::f64:
3047 Opcode = NVPTX::StoreParamF64;
3048 break;
3049 }
3050 break;
3051 case 2:
3052 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3053 default:
3054 return false;
3055 case MVT::i1:
3056 Opcode = NVPTX::StoreParamV2I8;
3057 break;
3058 case MVT::i8:
3059 Opcode = NVPTX::StoreParamV2I8;
3060 break;
3061 case MVT::i16:
3062 Opcode = NVPTX::StoreParamV2I16;
3063 break;
3064 case MVT::i32:
3065 Opcode = NVPTX::StoreParamV2I32;
3066 break;
3067 case MVT::i64:
3068 Opcode = NVPTX::StoreParamV2I64;
3069 break;
3070 case MVT::f32:
3071 Opcode = NVPTX::StoreParamV2F32;
3072 break;
3073 case MVT::f64:
3074 Opcode = NVPTX::StoreParamV2F64;
3075 break;
3076 }
3077 break;
3078 case 4:
3079 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3080 default:
3081 return false;
3082 case MVT::i1:
3083 Opcode = NVPTX::StoreParamV4I8;
3084 break;
3085 case MVT::i8:
3086 Opcode = NVPTX::StoreParamV4I8;
3087 break;
3088 case MVT::i16:
3089 Opcode = NVPTX::StoreParamV4I16;
3090 break;
3091 case MVT::i32:
3092 Opcode = NVPTX::StoreParamV4I32;
3093 break;
3094 case MVT::f32:
3095 Opcode = NVPTX::StoreParamV4F32;
3096 break;
3097 }
3098 break;
3099 }
3100 break;
3101 // Special case: if we have a sign-extend/zero-extend node, insert the
3102 // conversion instruction first, and use that as the value operand to
3103 // the selected StoreParam node.
3104 case NVPTXISD::StoreParamU32: {
3105 Opcode = NVPTX::StoreParamI32;
3106 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3107 MVT::i32);
3108 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
3109 MVT::i32, Ops[0], CvtNone);
3110 Ops[0] = SDValue(Cvt, 0);
3111 break;
3112 }
3113 case NVPTXISD::StoreParamS32: {
3114 Opcode = NVPTX::StoreParamI32;
3115 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
3116 MVT::i32);
3117 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3118 MVT::i32, Ops[0], CvtNone);
3119 Ops[0] = SDValue(Cvt, 0);
3120 break;
3121 }
3122 }
3123
3124 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3125 SDNode *Ret =
3126 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
3127 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3128 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3129 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3130
3131 ReplaceNode(N, Ret);
3132 return true;
3133 }
3134
tryTextureIntrinsic(SDNode * N)3135 bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
3136 SDValue Chain = N->getOperand(0);
3137 unsigned Opc = 0;
3138 SmallVector<SDValue, 8> Ops;
3139
3140 switch (N->getOpcode()) {
3141 default: return false;
3142 case NVPTXISD::Tex1DFloatS32:
3143 Opc = NVPTX::TEX_1D_F32_S32;
3144 break;
3145 case NVPTXISD::Tex1DFloatFloat:
3146 Opc = NVPTX::TEX_1D_F32_F32;
3147 break;
3148 case NVPTXISD::Tex1DFloatFloatLevel:
3149 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3150 break;
3151 case NVPTXISD::Tex1DFloatFloatGrad:
3152 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3153 break;
3154 case NVPTXISD::Tex1DS32S32:
3155 Opc = NVPTX::TEX_1D_S32_S32;
3156 break;
3157 case NVPTXISD::Tex1DS32Float:
3158 Opc = NVPTX::TEX_1D_S32_F32;
3159 break;
3160 case NVPTXISD::Tex1DS32FloatLevel:
3161 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
3162 break;
3163 case NVPTXISD::Tex1DS32FloatGrad:
3164 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
3165 break;
3166 case NVPTXISD::Tex1DU32S32:
3167 Opc = NVPTX::TEX_1D_U32_S32;
3168 break;
3169 case NVPTXISD::Tex1DU32Float:
3170 Opc = NVPTX::TEX_1D_U32_F32;
3171 break;
3172 case NVPTXISD::Tex1DU32FloatLevel:
3173 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3174 break;
3175 case NVPTXISD::Tex1DU32FloatGrad:
3176 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3177 break;
3178 case NVPTXISD::Tex1DArrayFloatS32:
3179 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
3180 break;
3181 case NVPTXISD::Tex1DArrayFloatFloat:
3182 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3183 break;
3184 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3185 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3186 break;
3187 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3188 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3189 break;
3190 case NVPTXISD::Tex1DArrayS32S32:
3191 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
3192 break;
3193 case NVPTXISD::Tex1DArrayS32Float:
3194 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
3195 break;
3196 case NVPTXISD::Tex1DArrayS32FloatLevel:
3197 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
3198 break;
3199 case NVPTXISD::Tex1DArrayS32FloatGrad:
3200 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
3201 break;
3202 case NVPTXISD::Tex1DArrayU32S32:
3203 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3204 break;
3205 case NVPTXISD::Tex1DArrayU32Float:
3206 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3207 break;
3208 case NVPTXISD::Tex1DArrayU32FloatLevel:
3209 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3210 break;
3211 case NVPTXISD::Tex1DArrayU32FloatGrad:
3212 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3213 break;
3214 case NVPTXISD::Tex2DFloatS32:
3215 Opc = NVPTX::TEX_2D_F32_S32;
3216 break;
3217 case NVPTXISD::Tex2DFloatFloat:
3218 Opc = NVPTX::TEX_2D_F32_F32;
3219 break;
3220 case NVPTXISD::Tex2DFloatFloatLevel:
3221 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3222 break;
3223 case NVPTXISD::Tex2DFloatFloatGrad:
3224 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3225 break;
3226 case NVPTXISD::Tex2DS32S32:
3227 Opc = NVPTX::TEX_2D_S32_S32;
3228 break;
3229 case NVPTXISD::Tex2DS32Float:
3230 Opc = NVPTX::TEX_2D_S32_F32;
3231 break;
3232 case NVPTXISD::Tex2DS32FloatLevel:
3233 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
3234 break;
3235 case NVPTXISD::Tex2DS32FloatGrad:
3236 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
3237 break;
3238 case NVPTXISD::Tex2DU32S32:
3239 Opc = NVPTX::TEX_2D_U32_S32;
3240 break;
3241 case NVPTXISD::Tex2DU32Float:
3242 Opc = NVPTX::TEX_2D_U32_F32;
3243 break;
3244 case NVPTXISD::Tex2DU32FloatLevel:
3245 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3246 break;
3247 case NVPTXISD::Tex2DU32FloatGrad:
3248 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3249 break;
3250 case NVPTXISD::Tex2DArrayFloatS32:
3251 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
3252 break;
3253 case NVPTXISD::Tex2DArrayFloatFloat:
3254 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3255 break;
3256 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3257 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3258 break;
3259 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3260 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3261 break;
3262 case NVPTXISD::Tex2DArrayS32S32:
3263 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
3264 break;
3265 case NVPTXISD::Tex2DArrayS32Float:
3266 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
3267 break;
3268 case NVPTXISD::Tex2DArrayS32FloatLevel:
3269 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
3270 break;
3271 case NVPTXISD::Tex2DArrayS32FloatGrad:
3272 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
3273 break;
3274 case NVPTXISD::Tex2DArrayU32S32:
3275 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3276 break;
3277 case NVPTXISD::Tex2DArrayU32Float:
3278 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3279 break;
3280 case NVPTXISD::Tex2DArrayU32FloatLevel:
3281 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3282 break;
3283 case NVPTXISD::Tex2DArrayU32FloatGrad:
3284 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3285 break;
3286 case NVPTXISD::Tex3DFloatS32:
3287 Opc = NVPTX::TEX_3D_F32_S32;
3288 break;
3289 case NVPTXISD::Tex3DFloatFloat:
3290 Opc = NVPTX::TEX_3D_F32_F32;
3291 break;
3292 case NVPTXISD::Tex3DFloatFloatLevel:
3293 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3294 break;
3295 case NVPTXISD::Tex3DFloatFloatGrad:
3296 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3297 break;
3298 case NVPTXISD::Tex3DS32S32:
3299 Opc = NVPTX::TEX_3D_S32_S32;
3300 break;
3301 case NVPTXISD::Tex3DS32Float:
3302 Opc = NVPTX::TEX_3D_S32_F32;
3303 break;
3304 case NVPTXISD::Tex3DS32FloatLevel:
3305 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
3306 break;
3307 case NVPTXISD::Tex3DS32FloatGrad:
3308 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3309 break;
3310 case NVPTXISD::Tex3DU32S32:
3311 Opc = NVPTX::TEX_3D_U32_S32;
3312 break;
3313 case NVPTXISD::Tex3DU32Float:
3314 Opc = NVPTX::TEX_3D_U32_F32;
3315 break;
3316 case NVPTXISD::Tex3DU32FloatLevel:
3317 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3318 break;
3319 case NVPTXISD::Tex3DU32FloatGrad:
3320 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3321 break;
3322 case NVPTXISD::TexCubeFloatFloat:
3323 Opc = NVPTX::TEX_CUBE_F32_F32;
3324 break;
3325 case NVPTXISD::TexCubeFloatFloatLevel:
3326 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3327 break;
3328 case NVPTXISD::TexCubeS32Float:
3329 Opc = NVPTX::TEX_CUBE_S32_F32;
3330 break;
3331 case NVPTXISD::TexCubeS32FloatLevel:
3332 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3333 break;
3334 case NVPTXISD::TexCubeU32Float:
3335 Opc = NVPTX::TEX_CUBE_U32_F32;
3336 break;
3337 case NVPTXISD::TexCubeU32FloatLevel:
3338 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3339 break;
3340 case NVPTXISD::TexCubeArrayFloatFloat:
3341 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3342 break;
3343 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3344 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3345 break;
3346 case NVPTXISD::TexCubeArrayS32Float:
3347 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3348 break;
3349 case NVPTXISD::TexCubeArrayS32FloatLevel:
3350 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3351 break;
3352 case NVPTXISD::TexCubeArrayU32Float:
3353 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3354 break;
3355 case NVPTXISD::TexCubeArrayU32FloatLevel:
3356 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3357 break;
3358 case NVPTXISD::Tld4R2DFloatFloat:
3359 Opc = NVPTX::TLD4_R_2D_F32_F32;
3360 break;
3361 case NVPTXISD::Tld4G2DFloatFloat:
3362 Opc = NVPTX::TLD4_G_2D_F32_F32;
3363 break;
3364 case NVPTXISD::Tld4B2DFloatFloat:
3365 Opc = NVPTX::TLD4_B_2D_F32_F32;
3366 break;
3367 case NVPTXISD::Tld4A2DFloatFloat:
3368 Opc = NVPTX::TLD4_A_2D_F32_F32;
3369 break;
3370 case NVPTXISD::Tld4R2DS64Float:
3371 Opc = NVPTX::TLD4_R_2D_S32_F32;
3372 break;
3373 case NVPTXISD::Tld4G2DS64Float:
3374 Opc = NVPTX::TLD4_G_2D_S32_F32;
3375 break;
3376 case NVPTXISD::Tld4B2DS64Float:
3377 Opc = NVPTX::TLD4_B_2D_S32_F32;
3378 break;
3379 case NVPTXISD::Tld4A2DS64Float:
3380 Opc = NVPTX::TLD4_A_2D_S32_F32;
3381 break;
3382 case NVPTXISD::Tld4R2DU64Float:
3383 Opc = NVPTX::TLD4_R_2D_U32_F32;
3384 break;
3385 case NVPTXISD::Tld4G2DU64Float:
3386 Opc = NVPTX::TLD4_G_2D_U32_F32;
3387 break;
3388 case NVPTXISD::Tld4B2DU64Float:
3389 Opc = NVPTX::TLD4_B_2D_U32_F32;
3390 break;
3391 case NVPTXISD::Tld4A2DU64Float:
3392 Opc = NVPTX::TLD4_A_2D_U32_F32;
3393 break;
3394 case NVPTXISD::TexUnified1DFloatS32:
3395 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3396 break;
3397 case NVPTXISD::TexUnified1DFloatFloat:
3398 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3399 break;
3400 case NVPTXISD::TexUnified1DFloatFloatLevel:
3401 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3402 break;
3403 case NVPTXISD::TexUnified1DFloatFloatGrad:
3404 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3405 break;
3406 case NVPTXISD::TexUnified1DS32S32:
3407 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3408 break;
3409 case NVPTXISD::TexUnified1DS32Float:
3410 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3411 break;
3412 case NVPTXISD::TexUnified1DS32FloatLevel:
3413 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3414 break;
3415 case NVPTXISD::TexUnified1DS32FloatGrad:
3416 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3417 break;
3418 case NVPTXISD::TexUnified1DU32S32:
3419 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3420 break;
3421 case NVPTXISD::TexUnified1DU32Float:
3422 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3423 break;
3424 case NVPTXISD::TexUnified1DU32FloatLevel:
3425 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3426 break;
3427 case NVPTXISD::TexUnified1DU32FloatGrad:
3428 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3429 break;
3430 case NVPTXISD::TexUnified1DArrayFloatS32:
3431 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3432 break;
3433 case NVPTXISD::TexUnified1DArrayFloatFloat:
3434 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3435 break;
3436 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3437 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3438 break;
3439 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3440 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3441 break;
3442 case NVPTXISD::TexUnified1DArrayS32S32:
3443 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3444 break;
3445 case NVPTXISD::TexUnified1DArrayS32Float:
3446 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3447 break;
3448 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3449 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3450 break;
3451 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3452 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3453 break;
3454 case NVPTXISD::TexUnified1DArrayU32S32:
3455 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3456 break;
3457 case NVPTXISD::TexUnified1DArrayU32Float:
3458 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3459 break;
3460 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3461 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3462 break;
3463 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3464 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3465 break;
3466 case NVPTXISD::TexUnified2DFloatS32:
3467 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3468 break;
3469 case NVPTXISD::TexUnified2DFloatFloat:
3470 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3471 break;
3472 case NVPTXISD::TexUnified2DFloatFloatLevel:
3473 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3474 break;
3475 case NVPTXISD::TexUnified2DFloatFloatGrad:
3476 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3477 break;
3478 case NVPTXISD::TexUnified2DS32S32:
3479 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3480 break;
3481 case NVPTXISD::TexUnified2DS32Float:
3482 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3483 break;
3484 case NVPTXISD::TexUnified2DS32FloatLevel:
3485 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3486 break;
3487 case NVPTXISD::TexUnified2DS32FloatGrad:
3488 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3489 break;
3490 case NVPTXISD::TexUnified2DU32S32:
3491 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3492 break;
3493 case NVPTXISD::TexUnified2DU32Float:
3494 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3495 break;
3496 case NVPTXISD::TexUnified2DU32FloatLevel:
3497 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3498 break;
3499 case NVPTXISD::TexUnified2DU32FloatGrad:
3500 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3501 break;
3502 case NVPTXISD::TexUnified2DArrayFloatS32:
3503 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3504 break;
3505 case NVPTXISD::TexUnified2DArrayFloatFloat:
3506 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3507 break;
3508 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3509 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3510 break;
3511 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3512 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3513 break;
3514 case NVPTXISD::TexUnified2DArrayS32S32:
3515 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3516 break;
3517 case NVPTXISD::TexUnified2DArrayS32Float:
3518 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3519 break;
3520 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3521 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3522 break;
3523 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3524 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3525 break;
3526 case NVPTXISD::TexUnified2DArrayU32S32:
3527 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3528 break;
3529 case NVPTXISD::TexUnified2DArrayU32Float:
3530 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3531 break;
3532 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3533 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3534 break;
3535 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3536 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3537 break;
3538 case NVPTXISD::TexUnified3DFloatS32:
3539 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3540 break;
3541 case NVPTXISD::TexUnified3DFloatFloat:
3542 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3543 break;
3544 case NVPTXISD::TexUnified3DFloatFloatLevel:
3545 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3546 break;
3547 case NVPTXISD::TexUnified3DFloatFloatGrad:
3548 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3549 break;
3550 case NVPTXISD::TexUnified3DS32S32:
3551 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3552 break;
3553 case NVPTXISD::TexUnified3DS32Float:
3554 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3555 break;
3556 case NVPTXISD::TexUnified3DS32FloatLevel:
3557 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3558 break;
3559 case NVPTXISD::TexUnified3DS32FloatGrad:
3560 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3561 break;
3562 case NVPTXISD::TexUnified3DU32S32:
3563 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3564 break;
3565 case NVPTXISD::TexUnified3DU32Float:
3566 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3567 break;
3568 case NVPTXISD::TexUnified3DU32FloatLevel:
3569 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3570 break;
3571 case NVPTXISD::TexUnified3DU32FloatGrad:
3572 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3573 break;
3574 case NVPTXISD::TexUnifiedCubeFloatFloat:
3575 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3576 break;
3577 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3578 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3579 break;
3580 case NVPTXISD::TexUnifiedCubeS32Float:
3581 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3582 break;
3583 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3584 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3585 break;
3586 case NVPTXISD::TexUnifiedCubeU32Float:
3587 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3588 break;
3589 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3590 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3591 break;
3592 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3593 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3594 break;
3595 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3596 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3597 break;
3598 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3599 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3600 break;
3601 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3602 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3603 break;
3604 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3605 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3606 break;
3607 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3608 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3609 break;
3610 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3611 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3612 break;
3613 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3614 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3615 break;
3616 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3617 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3618 break;
3619 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3620 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3621 break;
3622 case NVPTXISD::Tld4UnifiedR2DS64Float:
3623 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3624 break;
3625 case NVPTXISD::Tld4UnifiedG2DS64Float:
3626 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3627 break;
3628 case NVPTXISD::Tld4UnifiedB2DS64Float:
3629 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3630 break;
3631 case NVPTXISD::Tld4UnifiedA2DS64Float:
3632 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3633 break;
3634 case NVPTXISD::Tld4UnifiedR2DU64Float:
3635 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3636 break;
3637 case NVPTXISD::Tld4UnifiedG2DU64Float:
3638 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3639 break;
3640 case NVPTXISD::Tld4UnifiedB2DU64Float:
3641 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3642 break;
3643 case NVPTXISD::Tld4UnifiedA2DU64Float:
3644 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
3645 break;
3646 }
3647
3648 // Copy over operands
3649 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
3650 Ops.push_back(N->getOperand(i));
3651 }
3652
3653 Ops.push_back(Chain);
3654 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3655 return true;
3656 }
3657
trySurfaceIntrinsic(SDNode * N)3658 bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
3659 SDValue Chain = N->getOperand(0);
3660 SDValue TexHandle = N->getOperand(1);
3661 unsigned Opc = 0;
3662 SmallVector<SDValue, 8> Ops;
3663 switch (N->getOpcode()) {
3664 default: return false;
3665 case NVPTXISD::Suld1DI8Clamp:
3666 Opc = NVPTX::SULD_1D_I8_CLAMP;
3667 Ops.push_back(TexHandle);
3668 Ops.push_back(N->getOperand(2));
3669 Ops.push_back(Chain);
3670 break;
3671 case NVPTXISD::Suld1DI16Clamp:
3672 Opc = NVPTX::SULD_1D_I16_CLAMP;
3673 Ops.push_back(TexHandle);
3674 Ops.push_back(N->getOperand(2));
3675 Ops.push_back(Chain);
3676 break;
3677 case NVPTXISD::Suld1DI32Clamp:
3678 Opc = NVPTX::SULD_1D_I32_CLAMP;
3679 Ops.push_back(TexHandle);
3680 Ops.push_back(N->getOperand(2));
3681 Ops.push_back(Chain);
3682 break;
3683 case NVPTXISD::Suld1DI64Clamp:
3684 Opc = NVPTX::SULD_1D_I64_CLAMP;
3685 Ops.push_back(TexHandle);
3686 Ops.push_back(N->getOperand(2));
3687 Ops.push_back(Chain);
3688 break;
3689 case NVPTXISD::Suld1DV2I8Clamp:
3690 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3691 Ops.push_back(TexHandle);
3692 Ops.push_back(N->getOperand(2));
3693 Ops.push_back(Chain);
3694 break;
3695 case NVPTXISD::Suld1DV2I16Clamp:
3696 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3697 Ops.push_back(TexHandle);
3698 Ops.push_back(N->getOperand(2));
3699 Ops.push_back(Chain);
3700 break;
3701 case NVPTXISD::Suld1DV2I32Clamp:
3702 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3703 Ops.push_back(TexHandle);
3704 Ops.push_back(N->getOperand(2));
3705 Ops.push_back(Chain);
3706 break;
3707 case NVPTXISD::Suld1DV2I64Clamp:
3708 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3709 Ops.push_back(TexHandle);
3710 Ops.push_back(N->getOperand(2));
3711 Ops.push_back(Chain);
3712 break;
3713 case NVPTXISD::Suld1DV4I8Clamp:
3714 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3715 Ops.push_back(TexHandle);
3716 Ops.push_back(N->getOperand(2));
3717 Ops.push_back(Chain);
3718 break;
3719 case NVPTXISD::Suld1DV4I16Clamp:
3720 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3721 Ops.push_back(TexHandle);
3722 Ops.push_back(N->getOperand(2));
3723 Ops.push_back(Chain);
3724 break;
3725 case NVPTXISD::Suld1DV4I32Clamp:
3726 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3727 Ops.push_back(TexHandle);
3728 Ops.push_back(N->getOperand(2));
3729 Ops.push_back(Chain);
3730 break;
3731 case NVPTXISD::Suld1DArrayI8Clamp:
3732 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3733 Ops.push_back(TexHandle);
3734 Ops.push_back(N->getOperand(2));
3735 Ops.push_back(N->getOperand(3));
3736 Ops.push_back(Chain);
3737 break;
3738 case NVPTXISD::Suld1DArrayI16Clamp:
3739 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3740 Ops.push_back(TexHandle);
3741 Ops.push_back(N->getOperand(2));
3742 Ops.push_back(N->getOperand(3));
3743 Ops.push_back(Chain);
3744 break;
3745 case NVPTXISD::Suld1DArrayI32Clamp:
3746 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3747 Ops.push_back(TexHandle);
3748 Ops.push_back(N->getOperand(2));
3749 Ops.push_back(N->getOperand(3));
3750 Ops.push_back(Chain);
3751 break;
3752 case NVPTXISD::Suld1DArrayI64Clamp:
3753 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3754 Ops.push_back(TexHandle);
3755 Ops.push_back(N->getOperand(2));
3756 Ops.push_back(N->getOperand(3));
3757 Ops.push_back(Chain);
3758 break;
3759 case NVPTXISD::Suld1DArrayV2I8Clamp:
3760 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3761 Ops.push_back(TexHandle);
3762 Ops.push_back(N->getOperand(2));
3763 Ops.push_back(N->getOperand(3));
3764 Ops.push_back(Chain);
3765 break;
3766 case NVPTXISD::Suld1DArrayV2I16Clamp:
3767 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3768 Ops.push_back(TexHandle);
3769 Ops.push_back(N->getOperand(2));
3770 Ops.push_back(N->getOperand(3));
3771 Ops.push_back(Chain);
3772 break;
3773 case NVPTXISD::Suld1DArrayV2I32Clamp:
3774 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3775 Ops.push_back(TexHandle);
3776 Ops.push_back(N->getOperand(2));
3777 Ops.push_back(N->getOperand(3));
3778 Ops.push_back(Chain);
3779 break;
3780 case NVPTXISD::Suld1DArrayV2I64Clamp:
3781 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3782 Ops.push_back(TexHandle);
3783 Ops.push_back(N->getOperand(2));
3784 Ops.push_back(N->getOperand(3));
3785 Ops.push_back(Chain);
3786 break;
3787 case NVPTXISD::Suld1DArrayV4I8Clamp:
3788 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3789 Ops.push_back(TexHandle);
3790 Ops.push_back(N->getOperand(2));
3791 Ops.push_back(N->getOperand(3));
3792 Ops.push_back(Chain);
3793 break;
3794 case NVPTXISD::Suld1DArrayV4I16Clamp:
3795 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3796 Ops.push_back(TexHandle);
3797 Ops.push_back(N->getOperand(2));
3798 Ops.push_back(N->getOperand(3));
3799 Ops.push_back(Chain);
3800 break;
3801 case NVPTXISD::Suld1DArrayV4I32Clamp:
3802 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3803 Ops.push_back(TexHandle);
3804 Ops.push_back(N->getOperand(2));
3805 Ops.push_back(N->getOperand(3));
3806 Ops.push_back(Chain);
3807 break;
3808 case NVPTXISD::Suld2DI8Clamp:
3809 Opc = NVPTX::SULD_2D_I8_CLAMP;
3810 Ops.push_back(TexHandle);
3811 Ops.push_back(N->getOperand(2));
3812 Ops.push_back(N->getOperand(3));
3813 Ops.push_back(Chain);
3814 break;
3815 case NVPTXISD::Suld2DI16Clamp:
3816 Opc = NVPTX::SULD_2D_I16_CLAMP;
3817 Ops.push_back(TexHandle);
3818 Ops.push_back(N->getOperand(2));
3819 Ops.push_back(N->getOperand(3));
3820 Ops.push_back(Chain);
3821 break;
3822 case NVPTXISD::Suld2DI32Clamp:
3823 Opc = NVPTX::SULD_2D_I32_CLAMP;
3824 Ops.push_back(TexHandle);
3825 Ops.push_back(N->getOperand(2));
3826 Ops.push_back(N->getOperand(3));
3827 Ops.push_back(Chain);
3828 break;
3829 case NVPTXISD::Suld2DI64Clamp:
3830 Opc = NVPTX::SULD_2D_I64_CLAMP;
3831 Ops.push_back(TexHandle);
3832 Ops.push_back(N->getOperand(2));
3833 Ops.push_back(N->getOperand(3));
3834 Ops.push_back(Chain);
3835 break;
3836 case NVPTXISD::Suld2DV2I8Clamp:
3837 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3838 Ops.push_back(TexHandle);
3839 Ops.push_back(N->getOperand(2));
3840 Ops.push_back(N->getOperand(3));
3841 Ops.push_back(Chain);
3842 break;
3843 case NVPTXISD::Suld2DV2I16Clamp:
3844 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3845 Ops.push_back(TexHandle);
3846 Ops.push_back(N->getOperand(2));
3847 Ops.push_back(N->getOperand(3));
3848 Ops.push_back(Chain);
3849 break;
3850 case NVPTXISD::Suld2DV2I32Clamp:
3851 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3852 Ops.push_back(TexHandle);
3853 Ops.push_back(N->getOperand(2));
3854 Ops.push_back(N->getOperand(3));
3855 Ops.push_back(Chain);
3856 break;
3857 case NVPTXISD::Suld2DV2I64Clamp:
3858 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3859 Ops.push_back(TexHandle);
3860 Ops.push_back(N->getOperand(2));
3861 Ops.push_back(N->getOperand(3));
3862 Ops.push_back(Chain);
3863 break;
3864 case NVPTXISD::Suld2DV4I8Clamp:
3865 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3866 Ops.push_back(TexHandle);
3867 Ops.push_back(N->getOperand(2));
3868 Ops.push_back(N->getOperand(3));
3869 Ops.push_back(Chain);
3870 break;
3871 case NVPTXISD::Suld2DV4I16Clamp:
3872 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3873 Ops.push_back(TexHandle);
3874 Ops.push_back(N->getOperand(2));
3875 Ops.push_back(N->getOperand(3));
3876 Ops.push_back(Chain);
3877 break;
3878 case NVPTXISD::Suld2DV4I32Clamp:
3879 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3880 Ops.push_back(TexHandle);
3881 Ops.push_back(N->getOperand(2));
3882 Ops.push_back(N->getOperand(3));
3883 Ops.push_back(Chain);
3884 break;
3885 case NVPTXISD::Suld2DArrayI8Clamp:
3886 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3887 Ops.push_back(TexHandle);
3888 Ops.push_back(N->getOperand(2));
3889 Ops.push_back(N->getOperand(3));
3890 Ops.push_back(N->getOperand(4));
3891 Ops.push_back(Chain);
3892 break;
3893 case NVPTXISD::Suld2DArrayI16Clamp:
3894 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3895 Ops.push_back(TexHandle);
3896 Ops.push_back(N->getOperand(2));
3897 Ops.push_back(N->getOperand(3));
3898 Ops.push_back(N->getOperand(4));
3899 Ops.push_back(Chain);
3900 break;
3901 case NVPTXISD::Suld2DArrayI32Clamp:
3902 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3903 Ops.push_back(TexHandle);
3904 Ops.push_back(N->getOperand(2));
3905 Ops.push_back(N->getOperand(3));
3906 Ops.push_back(N->getOperand(4));
3907 Ops.push_back(Chain);
3908 break;
3909 case NVPTXISD::Suld2DArrayI64Clamp:
3910 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3911 Ops.push_back(TexHandle);
3912 Ops.push_back(N->getOperand(2));
3913 Ops.push_back(N->getOperand(3));
3914 Ops.push_back(N->getOperand(4));
3915 Ops.push_back(Chain);
3916 break;
3917 case NVPTXISD::Suld2DArrayV2I8Clamp:
3918 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3919 Ops.push_back(TexHandle);
3920 Ops.push_back(N->getOperand(2));
3921 Ops.push_back(N->getOperand(3));
3922 Ops.push_back(N->getOperand(4));
3923 Ops.push_back(Chain);
3924 break;
3925 case NVPTXISD::Suld2DArrayV2I16Clamp:
3926 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3927 Ops.push_back(TexHandle);
3928 Ops.push_back(N->getOperand(2));
3929 Ops.push_back(N->getOperand(3));
3930 Ops.push_back(N->getOperand(4));
3931 Ops.push_back(Chain);
3932 break;
3933 case NVPTXISD::Suld2DArrayV2I32Clamp:
3934 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3935 Ops.push_back(TexHandle);
3936 Ops.push_back(N->getOperand(2));
3937 Ops.push_back(N->getOperand(3));
3938 Ops.push_back(N->getOperand(4));
3939 Ops.push_back(Chain);
3940 break;
3941 case NVPTXISD::Suld2DArrayV2I64Clamp:
3942 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3943 Ops.push_back(TexHandle);
3944 Ops.push_back(N->getOperand(2));
3945 Ops.push_back(N->getOperand(3));
3946 Ops.push_back(N->getOperand(4));
3947 Ops.push_back(Chain);
3948 break;
3949 case NVPTXISD::Suld2DArrayV4I8Clamp:
3950 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3951 Ops.push_back(TexHandle);
3952 Ops.push_back(N->getOperand(2));
3953 Ops.push_back(N->getOperand(3));
3954 Ops.push_back(N->getOperand(4));
3955 Ops.push_back(Chain);
3956 break;
3957 case NVPTXISD::Suld2DArrayV4I16Clamp:
3958 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3959 Ops.push_back(TexHandle);
3960 Ops.push_back(N->getOperand(2));
3961 Ops.push_back(N->getOperand(3));
3962 Ops.push_back(N->getOperand(4));
3963 Ops.push_back(Chain);
3964 break;
3965 case NVPTXISD::Suld2DArrayV4I32Clamp:
3966 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3967 Ops.push_back(TexHandle);
3968 Ops.push_back(N->getOperand(2));
3969 Ops.push_back(N->getOperand(3));
3970 Ops.push_back(N->getOperand(4));
3971 Ops.push_back(Chain);
3972 break;
3973 case NVPTXISD::Suld3DI8Clamp:
3974 Opc = NVPTX::SULD_3D_I8_CLAMP;
3975 Ops.push_back(TexHandle);
3976 Ops.push_back(N->getOperand(2));
3977 Ops.push_back(N->getOperand(3));
3978 Ops.push_back(N->getOperand(4));
3979 Ops.push_back(Chain);
3980 break;
3981 case NVPTXISD::Suld3DI16Clamp:
3982 Opc = NVPTX::SULD_3D_I16_CLAMP;
3983 Ops.push_back(TexHandle);
3984 Ops.push_back(N->getOperand(2));
3985 Ops.push_back(N->getOperand(3));
3986 Ops.push_back(N->getOperand(4));
3987 Ops.push_back(Chain);
3988 break;
3989 case NVPTXISD::Suld3DI32Clamp:
3990 Opc = NVPTX::SULD_3D_I32_CLAMP;
3991 Ops.push_back(TexHandle);
3992 Ops.push_back(N->getOperand(2));
3993 Ops.push_back(N->getOperand(3));
3994 Ops.push_back(N->getOperand(4));
3995 Ops.push_back(Chain);
3996 break;
3997 case NVPTXISD::Suld3DI64Clamp:
3998 Opc = NVPTX::SULD_3D_I64_CLAMP;
3999 Ops.push_back(TexHandle);
4000 Ops.push_back(N->getOperand(2));
4001 Ops.push_back(N->getOperand(3));
4002 Ops.push_back(N->getOperand(4));
4003 Ops.push_back(Chain);
4004 break;
4005 case NVPTXISD::Suld3DV2I8Clamp:
4006 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
4007 Ops.push_back(TexHandle);
4008 Ops.push_back(N->getOperand(2));
4009 Ops.push_back(N->getOperand(3));
4010 Ops.push_back(N->getOperand(4));
4011 Ops.push_back(Chain);
4012 break;
4013 case NVPTXISD::Suld3DV2I16Clamp:
4014 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
4015 Ops.push_back(TexHandle);
4016 Ops.push_back(N->getOperand(2));
4017 Ops.push_back(N->getOperand(3));
4018 Ops.push_back(N->getOperand(4));
4019 Ops.push_back(Chain);
4020 break;
4021 case NVPTXISD::Suld3DV2I32Clamp:
4022 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
4023 Ops.push_back(TexHandle);
4024 Ops.push_back(N->getOperand(2));
4025 Ops.push_back(N->getOperand(3));
4026 Ops.push_back(N->getOperand(4));
4027 Ops.push_back(Chain);
4028 break;
4029 case NVPTXISD::Suld3DV2I64Clamp:
4030 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
4031 Ops.push_back(TexHandle);
4032 Ops.push_back(N->getOperand(2));
4033 Ops.push_back(N->getOperand(3));
4034 Ops.push_back(N->getOperand(4));
4035 Ops.push_back(Chain);
4036 break;
4037 case NVPTXISD::Suld3DV4I8Clamp:
4038 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
4039 Ops.push_back(TexHandle);
4040 Ops.push_back(N->getOperand(2));
4041 Ops.push_back(N->getOperand(3));
4042 Ops.push_back(N->getOperand(4));
4043 Ops.push_back(Chain);
4044 break;
4045 case NVPTXISD::Suld3DV4I16Clamp:
4046 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
4047 Ops.push_back(TexHandle);
4048 Ops.push_back(N->getOperand(2));
4049 Ops.push_back(N->getOperand(3));
4050 Ops.push_back(N->getOperand(4));
4051 Ops.push_back(Chain);
4052 break;
4053 case NVPTXISD::Suld3DV4I32Clamp:
4054 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
4055 Ops.push_back(TexHandle);
4056 Ops.push_back(N->getOperand(2));
4057 Ops.push_back(N->getOperand(3));
4058 Ops.push_back(N->getOperand(4));
4059 Ops.push_back(Chain);
4060 break;
4061 case NVPTXISD::Suld1DI8Trap:
4062 Opc = NVPTX::SULD_1D_I8_TRAP;
4063 Ops.push_back(TexHandle);
4064 Ops.push_back(N->getOperand(2));
4065 Ops.push_back(Chain);
4066 break;
4067 case NVPTXISD::Suld1DI16Trap:
4068 Opc = NVPTX::SULD_1D_I16_TRAP;
4069 Ops.push_back(TexHandle);
4070 Ops.push_back(N->getOperand(2));
4071 Ops.push_back(Chain);
4072 break;
4073 case NVPTXISD::Suld1DI32Trap:
4074 Opc = NVPTX::SULD_1D_I32_TRAP;
4075 Ops.push_back(TexHandle);
4076 Ops.push_back(N->getOperand(2));
4077 Ops.push_back(Chain);
4078 break;
4079 case NVPTXISD::Suld1DI64Trap:
4080 Opc = NVPTX::SULD_1D_I64_TRAP;
4081 Ops.push_back(TexHandle);
4082 Ops.push_back(N->getOperand(2));
4083 Ops.push_back(Chain);
4084 break;
4085 case NVPTXISD::Suld1DV2I8Trap:
4086 Opc = NVPTX::SULD_1D_V2I8_TRAP;
4087 Ops.push_back(TexHandle);
4088 Ops.push_back(N->getOperand(2));
4089 Ops.push_back(Chain);
4090 break;
4091 case NVPTXISD::Suld1DV2I16Trap:
4092 Opc = NVPTX::SULD_1D_V2I16_TRAP;
4093 Ops.push_back(TexHandle);
4094 Ops.push_back(N->getOperand(2));
4095 Ops.push_back(Chain);
4096 break;
4097 case NVPTXISD::Suld1DV2I32Trap:
4098 Opc = NVPTX::SULD_1D_V2I32_TRAP;
4099 Ops.push_back(TexHandle);
4100 Ops.push_back(N->getOperand(2));
4101 Ops.push_back(Chain);
4102 break;
4103 case NVPTXISD::Suld1DV2I64Trap:
4104 Opc = NVPTX::SULD_1D_V2I64_TRAP;
4105 Ops.push_back(TexHandle);
4106 Ops.push_back(N->getOperand(2));
4107 Ops.push_back(Chain);
4108 break;
4109 case NVPTXISD::Suld1DV4I8Trap:
4110 Opc = NVPTX::SULD_1D_V4I8_TRAP;
4111 Ops.push_back(TexHandle);
4112 Ops.push_back(N->getOperand(2));
4113 Ops.push_back(Chain);
4114 break;
4115 case NVPTXISD::Suld1DV4I16Trap:
4116 Opc = NVPTX::SULD_1D_V4I16_TRAP;
4117 Ops.push_back(TexHandle);
4118 Ops.push_back(N->getOperand(2));
4119 Ops.push_back(Chain);
4120 break;
4121 case NVPTXISD::Suld1DV4I32Trap:
4122 Opc = NVPTX::SULD_1D_V4I32_TRAP;
4123 Ops.push_back(TexHandle);
4124 Ops.push_back(N->getOperand(2));
4125 Ops.push_back(Chain);
4126 break;
4127 case NVPTXISD::Suld1DArrayI8Trap:
4128 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4129 Ops.push_back(TexHandle);
4130 Ops.push_back(N->getOperand(2));
4131 Ops.push_back(N->getOperand(3));
4132 Ops.push_back(Chain);
4133 break;
4134 case NVPTXISD::Suld1DArrayI16Trap:
4135 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4136 Ops.push_back(TexHandle);
4137 Ops.push_back(N->getOperand(2));
4138 Ops.push_back(N->getOperand(3));
4139 Ops.push_back(Chain);
4140 break;
4141 case NVPTXISD::Suld1DArrayI32Trap:
4142 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4143 Ops.push_back(TexHandle);
4144 Ops.push_back(N->getOperand(2));
4145 Ops.push_back(N->getOperand(3));
4146 Ops.push_back(Chain);
4147 break;
4148 case NVPTXISD::Suld1DArrayI64Trap:
4149 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4150 Ops.push_back(TexHandle);
4151 Ops.push_back(N->getOperand(2));
4152 Ops.push_back(N->getOperand(3));
4153 Ops.push_back(Chain);
4154 break;
4155 case NVPTXISD::Suld1DArrayV2I8Trap:
4156 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4157 Ops.push_back(TexHandle);
4158 Ops.push_back(N->getOperand(2));
4159 Ops.push_back(N->getOperand(3));
4160 Ops.push_back(Chain);
4161 break;
4162 case NVPTXISD::Suld1DArrayV2I16Trap:
4163 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4164 Ops.push_back(TexHandle);
4165 Ops.push_back(N->getOperand(2));
4166 Ops.push_back(N->getOperand(3));
4167 Ops.push_back(Chain);
4168 break;
4169 case NVPTXISD::Suld1DArrayV2I32Trap:
4170 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4171 Ops.push_back(TexHandle);
4172 Ops.push_back(N->getOperand(2));
4173 Ops.push_back(N->getOperand(3));
4174 Ops.push_back(Chain);
4175 break;
4176 case NVPTXISD::Suld1DArrayV2I64Trap:
4177 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4178 Ops.push_back(TexHandle);
4179 Ops.push_back(N->getOperand(2));
4180 Ops.push_back(N->getOperand(3));
4181 Ops.push_back(Chain);
4182 break;
4183 case NVPTXISD::Suld1DArrayV4I8Trap:
4184 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4185 Ops.push_back(TexHandle);
4186 Ops.push_back(N->getOperand(2));
4187 Ops.push_back(N->getOperand(3));
4188 Ops.push_back(Chain);
4189 break;
4190 case NVPTXISD::Suld1DArrayV4I16Trap:
4191 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4192 Ops.push_back(TexHandle);
4193 Ops.push_back(N->getOperand(2));
4194 Ops.push_back(N->getOperand(3));
4195 Ops.push_back(Chain);
4196 break;
4197 case NVPTXISD::Suld1DArrayV4I32Trap:
4198 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4199 Ops.push_back(TexHandle);
4200 Ops.push_back(N->getOperand(2));
4201 Ops.push_back(N->getOperand(3));
4202 Ops.push_back(Chain);
4203 break;
4204 case NVPTXISD::Suld2DI8Trap:
4205 Opc = NVPTX::SULD_2D_I8_TRAP;
4206 Ops.push_back(TexHandle);
4207 Ops.push_back(N->getOperand(2));
4208 Ops.push_back(N->getOperand(3));
4209 Ops.push_back(Chain);
4210 break;
4211 case NVPTXISD::Suld2DI16Trap:
4212 Opc = NVPTX::SULD_2D_I16_TRAP;
4213 Ops.push_back(TexHandle);
4214 Ops.push_back(N->getOperand(2));
4215 Ops.push_back(N->getOperand(3));
4216 Ops.push_back(Chain);
4217 break;
4218 case NVPTXISD::Suld2DI32Trap:
4219 Opc = NVPTX::SULD_2D_I32_TRAP;
4220 Ops.push_back(TexHandle);
4221 Ops.push_back(N->getOperand(2));
4222 Ops.push_back(N->getOperand(3));
4223 Ops.push_back(Chain);
4224 break;
4225 case NVPTXISD::Suld2DI64Trap:
4226 Opc = NVPTX::SULD_2D_I64_TRAP;
4227 Ops.push_back(TexHandle);
4228 Ops.push_back(N->getOperand(2));
4229 Ops.push_back(N->getOperand(3));
4230 Ops.push_back(Chain);
4231 break;
4232 case NVPTXISD::Suld2DV2I8Trap:
4233 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4234 Ops.push_back(TexHandle);
4235 Ops.push_back(N->getOperand(2));
4236 Ops.push_back(N->getOperand(3));
4237 Ops.push_back(Chain);
4238 break;
4239 case NVPTXISD::Suld2DV2I16Trap:
4240 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4241 Ops.push_back(TexHandle);
4242 Ops.push_back(N->getOperand(2));
4243 Ops.push_back(N->getOperand(3));
4244 Ops.push_back(Chain);
4245 break;
4246 case NVPTXISD::Suld2DV2I32Trap:
4247 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4248 Ops.push_back(TexHandle);
4249 Ops.push_back(N->getOperand(2));
4250 Ops.push_back(N->getOperand(3));
4251 Ops.push_back(Chain);
4252 break;
4253 case NVPTXISD::Suld2DV2I64Trap:
4254 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4255 Ops.push_back(TexHandle);
4256 Ops.push_back(N->getOperand(2));
4257 Ops.push_back(N->getOperand(3));
4258 Ops.push_back(Chain);
4259 break;
4260 case NVPTXISD::Suld2DV4I8Trap:
4261 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4262 Ops.push_back(TexHandle);
4263 Ops.push_back(N->getOperand(2));
4264 Ops.push_back(N->getOperand(3));
4265 Ops.push_back(Chain);
4266 break;
4267 case NVPTXISD::Suld2DV4I16Trap:
4268 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4269 Ops.push_back(TexHandle);
4270 Ops.push_back(N->getOperand(2));
4271 Ops.push_back(N->getOperand(3));
4272 Ops.push_back(Chain);
4273 break;
4274 case NVPTXISD::Suld2DV4I32Trap:
4275 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4276 Ops.push_back(TexHandle);
4277 Ops.push_back(N->getOperand(2));
4278 Ops.push_back(N->getOperand(3));
4279 Ops.push_back(Chain);
4280 break;
4281 case NVPTXISD::Suld2DArrayI8Trap:
4282 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4283 Ops.push_back(TexHandle);
4284 Ops.push_back(N->getOperand(2));
4285 Ops.push_back(N->getOperand(3));
4286 Ops.push_back(N->getOperand(4));
4287 Ops.push_back(Chain);
4288 break;
4289 case NVPTXISD::Suld2DArrayI16Trap:
4290 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4291 Ops.push_back(TexHandle);
4292 Ops.push_back(N->getOperand(2));
4293 Ops.push_back(N->getOperand(3));
4294 Ops.push_back(N->getOperand(4));
4295 Ops.push_back(Chain);
4296 break;
4297 case NVPTXISD::Suld2DArrayI32Trap:
4298 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4299 Ops.push_back(TexHandle);
4300 Ops.push_back(N->getOperand(2));
4301 Ops.push_back(N->getOperand(3));
4302 Ops.push_back(N->getOperand(4));
4303 Ops.push_back(Chain);
4304 break;
4305 case NVPTXISD::Suld2DArrayI64Trap:
4306 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4307 Ops.push_back(TexHandle);
4308 Ops.push_back(N->getOperand(2));
4309 Ops.push_back(N->getOperand(3));
4310 Ops.push_back(N->getOperand(4));
4311 Ops.push_back(Chain);
4312 break;
4313 case NVPTXISD::Suld2DArrayV2I8Trap:
4314 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4315 Ops.push_back(TexHandle);
4316 Ops.push_back(N->getOperand(2));
4317 Ops.push_back(N->getOperand(3));
4318 Ops.push_back(N->getOperand(4));
4319 Ops.push_back(Chain);
4320 break;
4321 case NVPTXISD::Suld2DArrayV2I16Trap:
4322 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4323 Ops.push_back(TexHandle);
4324 Ops.push_back(N->getOperand(2));
4325 Ops.push_back(N->getOperand(3));
4326 Ops.push_back(N->getOperand(4));
4327 Ops.push_back(Chain);
4328 break;
4329 case NVPTXISD::Suld2DArrayV2I32Trap:
4330 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4331 Ops.push_back(TexHandle);
4332 Ops.push_back(N->getOperand(2));
4333 Ops.push_back(N->getOperand(3));
4334 Ops.push_back(N->getOperand(4));
4335 Ops.push_back(Chain);
4336 break;
4337 case NVPTXISD::Suld2DArrayV2I64Trap:
4338 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4339 Ops.push_back(TexHandle);
4340 Ops.push_back(N->getOperand(2));
4341 Ops.push_back(N->getOperand(3));
4342 Ops.push_back(N->getOperand(4));
4343 Ops.push_back(Chain);
4344 break;
4345 case NVPTXISD::Suld2DArrayV4I8Trap:
4346 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4347 Ops.push_back(TexHandle);
4348 Ops.push_back(N->getOperand(2));
4349 Ops.push_back(N->getOperand(3));
4350 Ops.push_back(N->getOperand(4));
4351 Ops.push_back(Chain);
4352 break;
4353 case NVPTXISD::Suld2DArrayV4I16Trap:
4354 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4355 Ops.push_back(TexHandle);
4356 Ops.push_back(N->getOperand(2));
4357 Ops.push_back(N->getOperand(3));
4358 Ops.push_back(N->getOperand(4));
4359 Ops.push_back(Chain);
4360 break;
4361 case NVPTXISD::Suld2DArrayV4I32Trap:
4362 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4363 Ops.push_back(TexHandle);
4364 Ops.push_back(N->getOperand(2));
4365 Ops.push_back(N->getOperand(3));
4366 Ops.push_back(N->getOperand(4));
4367 Ops.push_back(Chain);
4368 break;
4369 case NVPTXISD::Suld3DI8Trap:
4370 Opc = NVPTX::SULD_3D_I8_TRAP;
4371 Ops.push_back(TexHandle);
4372 Ops.push_back(N->getOperand(2));
4373 Ops.push_back(N->getOperand(3));
4374 Ops.push_back(N->getOperand(4));
4375 Ops.push_back(Chain);
4376 break;
4377 case NVPTXISD::Suld3DI16Trap:
4378 Opc = NVPTX::SULD_3D_I16_TRAP;
4379 Ops.push_back(TexHandle);
4380 Ops.push_back(N->getOperand(2));
4381 Ops.push_back(N->getOperand(3));
4382 Ops.push_back(N->getOperand(4));
4383 Ops.push_back(Chain);
4384 break;
4385 case NVPTXISD::Suld3DI32Trap:
4386 Opc = NVPTX::SULD_3D_I32_TRAP;
4387 Ops.push_back(TexHandle);
4388 Ops.push_back(N->getOperand(2));
4389 Ops.push_back(N->getOperand(3));
4390 Ops.push_back(N->getOperand(4));
4391 Ops.push_back(Chain);
4392 break;
4393 case NVPTXISD::Suld3DI64Trap:
4394 Opc = NVPTX::SULD_3D_I64_TRAP;
4395 Ops.push_back(TexHandle);
4396 Ops.push_back(N->getOperand(2));
4397 Ops.push_back(N->getOperand(3));
4398 Ops.push_back(N->getOperand(4));
4399 Ops.push_back(Chain);
4400 break;
4401 case NVPTXISD::Suld3DV2I8Trap:
4402 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4403 Ops.push_back(TexHandle);
4404 Ops.push_back(N->getOperand(2));
4405 Ops.push_back(N->getOperand(3));
4406 Ops.push_back(N->getOperand(4));
4407 Ops.push_back(Chain);
4408 break;
4409 case NVPTXISD::Suld3DV2I16Trap:
4410 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4411 Ops.push_back(TexHandle);
4412 Ops.push_back(N->getOperand(2));
4413 Ops.push_back(N->getOperand(3));
4414 Ops.push_back(N->getOperand(4));
4415 Ops.push_back(Chain);
4416 break;
4417 case NVPTXISD::Suld3DV2I32Trap:
4418 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4419 Ops.push_back(TexHandle);
4420 Ops.push_back(N->getOperand(2));
4421 Ops.push_back(N->getOperand(3));
4422 Ops.push_back(N->getOperand(4));
4423 Ops.push_back(Chain);
4424 break;
4425 case NVPTXISD::Suld3DV2I64Trap:
4426 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4427 Ops.push_back(TexHandle);
4428 Ops.push_back(N->getOperand(2));
4429 Ops.push_back(N->getOperand(3));
4430 Ops.push_back(N->getOperand(4));
4431 Ops.push_back(Chain);
4432 break;
4433 case NVPTXISD::Suld3DV4I8Trap:
4434 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4435 Ops.push_back(TexHandle);
4436 Ops.push_back(N->getOperand(2));
4437 Ops.push_back(N->getOperand(3));
4438 Ops.push_back(N->getOperand(4));
4439 Ops.push_back(Chain);
4440 break;
4441 case NVPTXISD::Suld3DV4I16Trap:
4442 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4443 Ops.push_back(TexHandle);
4444 Ops.push_back(N->getOperand(2));
4445 Ops.push_back(N->getOperand(3));
4446 Ops.push_back(N->getOperand(4));
4447 Ops.push_back(Chain);
4448 break;
4449 case NVPTXISD::Suld3DV4I32Trap:
4450 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4451 Ops.push_back(TexHandle);
4452 Ops.push_back(N->getOperand(2));
4453 Ops.push_back(N->getOperand(3));
4454 Ops.push_back(N->getOperand(4));
4455 Ops.push_back(Chain);
4456 break;
4457 case NVPTXISD::Suld1DI8Zero:
4458 Opc = NVPTX::SULD_1D_I8_ZERO;
4459 Ops.push_back(TexHandle);
4460 Ops.push_back(N->getOperand(2));
4461 Ops.push_back(Chain);
4462 break;
4463 case NVPTXISD::Suld1DI16Zero:
4464 Opc = NVPTX::SULD_1D_I16_ZERO;
4465 Ops.push_back(TexHandle);
4466 Ops.push_back(N->getOperand(2));
4467 Ops.push_back(Chain);
4468 break;
4469 case NVPTXISD::Suld1DI32Zero:
4470 Opc = NVPTX::SULD_1D_I32_ZERO;
4471 Ops.push_back(TexHandle);
4472 Ops.push_back(N->getOperand(2));
4473 Ops.push_back(Chain);
4474 break;
4475 case NVPTXISD::Suld1DI64Zero:
4476 Opc = NVPTX::SULD_1D_I64_ZERO;
4477 Ops.push_back(TexHandle);
4478 Ops.push_back(N->getOperand(2));
4479 Ops.push_back(Chain);
4480 break;
4481 case NVPTXISD::Suld1DV2I8Zero:
4482 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4483 Ops.push_back(TexHandle);
4484 Ops.push_back(N->getOperand(2));
4485 Ops.push_back(Chain);
4486 break;
4487 case NVPTXISD::Suld1DV2I16Zero:
4488 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4489 Ops.push_back(TexHandle);
4490 Ops.push_back(N->getOperand(2));
4491 Ops.push_back(Chain);
4492 break;
4493 case NVPTXISD::Suld1DV2I32Zero:
4494 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4495 Ops.push_back(TexHandle);
4496 Ops.push_back(N->getOperand(2));
4497 Ops.push_back(Chain);
4498 break;
4499 case NVPTXISD::Suld1DV2I64Zero:
4500 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4501 Ops.push_back(TexHandle);
4502 Ops.push_back(N->getOperand(2));
4503 Ops.push_back(Chain);
4504 break;
4505 case NVPTXISD::Suld1DV4I8Zero:
4506 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4507 Ops.push_back(TexHandle);
4508 Ops.push_back(N->getOperand(2));
4509 Ops.push_back(Chain);
4510 break;
4511 case NVPTXISD::Suld1DV4I16Zero:
4512 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4513 Ops.push_back(TexHandle);
4514 Ops.push_back(N->getOperand(2));
4515 Ops.push_back(Chain);
4516 break;
4517 case NVPTXISD::Suld1DV4I32Zero:
4518 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4519 Ops.push_back(TexHandle);
4520 Ops.push_back(N->getOperand(2));
4521 Ops.push_back(Chain);
4522 break;
4523 case NVPTXISD::Suld1DArrayI8Zero:
4524 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4525 Ops.push_back(TexHandle);
4526 Ops.push_back(N->getOperand(2));
4527 Ops.push_back(N->getOperand(3));
4528 Ops.push_back(Chain);
4529 break;
4530 case NVPTXISD::Suld1DArrayI16Zero:
4531 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4532 Ops.push_back(TexHandle);
4533 Ops.push_back(N->getOperand(2));
4534 Ops.push_back(N->getOperand(3));
4535 Ops.push_back(Chain);
4536 break;
4537 case NVPTXISD::Suld1DArrayI32Zero:
4538 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4539 Ops.push_back(TexHandle);
4540 Ops.push_back(N->getOperand(2));
4541 Ops.push_back(N->getOperand(3));
4542 Ops.push_back(Chain);
4543 break;
4544 case NVPTXISD::Suld1DArrayI64Zero:
4545 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4546 Ops.push_back(TexHandle);
4547 Ops.push_back(N->getOperand(2));
4548 Ops.push_back(N->getOperand(3));
4549 Ops.push_back(Chain);
4550 break;
4551 case NVPTXISD::Suld1DArrayV2I8Zero:
4552 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4553 Ops.push_back(TexHandle);
4554 Ops.push_back(N->getOperand(2));
4555 Ops.push_back(N->getOperand(3));
4556 Ops.push_back(Chain);
4557 break;
4558 case NVPTXISD::Suld1DArrayV2I16Zero:
4559 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4560 Ops.push_back(TexHandle);
4561 Ops.push_back(N->getOperand(2));
4562 Ops.push_back(N->getOperand(3));
4563 Ops.push_back(Chain);
4564 break;
4565 case NVPTXISD::Suld1DArrayV2I32Zero:
4566 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4567 Ops.push_back(TexHandle);
4568 Ops.push_back(N->getOperand(2));
4569 Ops.push_back(N->getOperand(3));
4570 Ops.push_back(Chain);
4571 break;
4572 case NVPTXISD::Suld1DArrayV2I64Zero:
4573 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4574 Ops.push_back(TexHandle);
4575 Ops.push_back(N->getOperand(2));
4576 Ops.push_back(N->getOperand(3));
4577 Ops.push_back(Chain);
4578 break;
4579 case NVPTXISD::Suld1DArrayV4I8Zero:
4580 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4581 Ops.push_back(TexHandle);
4582 Ops.push_back(N->getOperand(2));
4583 Ops.push_back(N->getOperand(3));
4584 Ops.push_back(Chain);
4585 break;
4586 case NVPTXISD::Suld1DArrayV4I16Zero:
4587 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4588 Ops.push_back(TexHandle);
4589 Ops.push_back(N->getOperand(2));
4590 Ops.push_back(N->getOperand(3));
4591 Ops.push_back(Chain);
4592 break;
4593 case NVPTXISD::Suld1DArrayV4I32Zero:
4594 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4595 Ops.push_back(TexHandle);
4596 Ops.push_back(N->getOperand(2));
4597 Ops.push_back(N->getOperand(3));
4598 Ops.push_back(Chain);
4599 break;
4600 case NVPTXISD::Suld2DI8Zero:
4601 Opc = NVPTX::SULD_2D_I8_ZERO;
4602 Ops.push_back(TexHandle);
4603 Ops.push_back(N->getOperand(2));
4604 Ops.push_back(N->getOperand(3));
4605 Ops.push_back(Chain);
4606 break;
4607 case NVPTXISD::Suld2DI16Zero:
4608 Opc = NVPTX::SULD_2D_I16_ZERO;
4609 Ops.push_back(TexHandle);
4610 Ops.push_back(N->getOperand(2));
4611 Ops.push_back(N->getOperand(3));
4612 Ops.push_back(Chain);
4613 break;
4614 case NVPTXISD::Suld2DI32Zero:
4615 Opc = NVPTX::SULD_2D_I32_ZERO;
4616 Ops.push_back(TexHandle);
4617 Ops.push_back(N->getOperand(2));
4618 Ops.push_back(N->getOperand(3));
4619 Ops.push_back(Chain);
4620 break;
4621 case NVPTXISD::Suld2DI64Zero:
4622 Opc = NVPTX::SULD_2D_I64_ZERO;
4623 Ops.push_back(TexHandle);
4624 Ops.push_back(N->getOperand(2));
4625 Ops.push_back(N->getOperand(3));
4626 Ops.push_back(Chain);
4627 break;
4628 case NVPTXISD::Suld2DV2I8Zero:
4629 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4630 Ops.push_back(TexHandle);
4631 Ops.push_back(N->getOperand(2));
4632 Ops.push_back(N->getOperand(3));
4633 Ops.push_back(Chain);
4634 break;
4635 case NVPTXISD::Suld2DV2I16Zero:
4636 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4637 Ops.push_back(TexHandle);
4638 Ops.push_back(N->getOperand(2));
4639 Ops.push_back(N->getOperand(3));
4640 Ops.push_back(Chain);
4641 break;
4642 case NVPTXISD::Suld2DV2I32Zero:
4643 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4644 Ops.push_back(TexHandle);
4645 Ops.push_back(N->getOperand(2));
4646 Ops.push_back(N->getOperand(3));
4647 Ops.push_back(Chain);
4648 break;
4649 case NVPTXISD::Suld2DV2I64Zero:
4650 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4651 Ops.push_back(TexHandle);
4652 Ops.push_back(N->getOperand(2));
4653 Ops.push_back(N->getOperand(3));
4654 Ops.push_back(Chain);
4655 break;
4656 case NVPTXISD::Suld2DV4I8Zero:
4657 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4658 Ops.push_back(TexHandle);
4659 Ops.push_back(N->getOperand(2));
4660 Ops.push_back(N->getOperand(3));
4661 Ops.push_back(Chain);
4662 break;
4663 case NVPTXISD::Suld2DV4I16Zero:
4664 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4665 Ops.push_back(TexHandle);
4666 Ops.push_back(N->getOperand(2));
4667 Ops.push_back(N->getOperand(3));
4668 Ops.push_back(Chain);
4669 break;
4670 case NVPTXISD::Suld2DV4I32Zero:
4671 Opc = NVPTX::SULD_2D_V4I32_ZERO;
4672 Ops.push_back(TexHandle);
4673 Ops.push_back(N->getOperand(2));
4674 Ops.push_back(N->getOperand(3));
4675 Ops.push_back(Chain);
4676 break;
4677 case NVPTXISD::Suld2DArrayI8Zero:
4678 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4679 Ops.push_back(TexHandle);
4680 Ops.push_back(N->getOperand(2));
4681 Ops.push_back(N->getOperand(3));
4682 Ops.push_back(N->getOperand(4));
4683 Ops.push_back(Chain);
4684 break;
4685 case NVPTXISD::Suld2DArrayI16Zero:
4686 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4687 Ops.push_back(TexHandle);
4688 Ops.push_back(N->getOperand(2));
4689 Ops.push_back(N->getOperand(3));
4690 Ops.push_back(N->getOperand(4));
4691 Ops.push_back(Chain);
4692 break;
4693 case NVPTXISD::Suld2DArrayI32Zero:
4694 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4695 Ops.push_back(TexHandle);
4696 Ops.push_back(N->getOperand(2));
4697 Ops.push_back(N->getOperand(3));
4698 Ops.push_back(N->getOperand(4));
4699 Ops.push_back(Chain);
4700 break;
4701 case NVPTXISD::Suld2DArrayI64Zero:
4702 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4703 Ops.push_back(TexHandle);
4704 Ops.push_back(N->getOperand(2));
4705 Ops.push_back(N->getOperand(3));
4706 Ops.push_back(N->getOperand(4));
4707 Ops.push_back(Chain);
4708 break;
4709 case NVPTXISD::Suld2DArrayV2I8Zero:
4710 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4711 Ops.push_back(TexHandle);
4712 Ops.push_back(N->getOperand(2));
4713 Ops.push_back(N->getOperand(3));
4714 Ops.push_back(N->getOperand(4));
4715 Ops.push_back(Chain);
4716 break;
4717 case NVPTXISD::Suld2DArrayV2I16Zero:
4718 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4719 Ops.push_back(TexHandle);
4720 Ops.push_back(N->getOperand(2));
4721 Ops.push_back(N->getOperand(3));
4722 Ops.push_back(N->getOperand(4));
4723 Ops.push_back(Chain);
4724 break;
4725 case NVPTXISD::Suld2DArrayV2I32Zero:
4726 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4727 Ops.push_back(TexHandle);
4728 Ops.push_back(N->getOperand(2));
4729 Ops.push_back(N->getOperand(3));
4730 Ops.push_back(N->getOperand(4));
4731 Ops.push_back(Chain);
4732 break;
4733 case NVPTXISD::Suld2DArrayV2I64Zero:
4734 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4735 Ops.push_back(TexHandle);
4736 Ops.push_back(N->getOperand(2));
4737 Ops.push_back(N->getOperand(3));
4738 Ops.push_back(N->getOperand(4));
4739 Ops.push_back(Chain);
4740 break;
4741 case NVPTXISD::Suld2DArrayV4I8Zero:
4742 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4743 Ops.push_back(TexHandle);
4744 Ops.push_back(N->getOperand(2));
4745 Ops.push_back(N->getOperand(3));
4746 Ops.push_back(N->getOperand(4));
4747 Ops.push_back(Chain);
4748 break;
4749 case NVPTXISD::Suld2DArrayV4I16Zero:
4750 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4751 Ops.push_back(TexHandle);
4752 Ops.push_back(N->getOperand(2));
4753 Ops.push_back(N->getOperand(3));
4754 Ops.push_back(N->getOperand(4));
4755 Ops.push_back(Chain);
4756 break;
4757 case NVPTXISD::Suld2DArrayV4I32Zero:
4758 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4759 Ops.push_back(TexHandle);
4760 Ops.push_back(N->getOperand(2));
4761 Ops.push_back(N->getOperand(3));
4762 Ops.push_back(N->getOperand(4));
4763 Ops.push_back(Chain);
4764 break;
4765 case NVPTXISD::Suld3DI8Zero:
4766 Opc = NVPTX::SULD_3D_I8_ZERO;
4767 Ops.push_back(TexHandle);
4768 Ops.push_back(N->getOperand(2));
4769 Ops.push_back(N->getOperand(3));
4770 Ops.push_back(N->getOperand(4));
4771 Ops.push_back(Chain);
4772 break;
4773 case NVPTXISD::Suld3DI16Zero:
4774 Opc = NVPTX::SULD_3D_I16_ZERO;
4775 Ops.push_back(TexHandle);
4776 Ops.push_back(N->getOperand(2));
4777 Ops.push_back(N->getOperand(3));
4778 Ops.push_back(N->getOperand(4));
4779 Ops.push_back(Chain);
4780 break;
4781 case NVPTXISD::Suld3DI32Zero:
4782 Opc = NVPTX::SULD_3D_I32_ZERO;
4783 Ops.push_back(TexHandle);
4784 Ops.push_back(N->getOperand(2));
4785 Ops.push_back(N->getOperand(3));
4786 Ops.push_back(N->getOperand(4));
4787 Ops.push_back(Chain);
4788 break;
4789 case NVPTXISD::Suld3DI64Zero:
4790 Opc = NVPTX::SULD_3D_I64_ZERO;
4791 Ops.push_back(TexHandle);
4792 Ops.push_back(N->getOperand(2));
4793 Ops.push_back(N->getOperand(3));
4794 Ops.push_back(N->getOperand(4));
4795 Ops.push_back(Chain);
4796 break;
4797 case NVPTXISD::Suld3DV2I8Zero:
4798 Opc = NVPTX::SULD_3D_V2I8_ZERO;
4799 Ops.push_back(TexHandle);
4800 Ops.push_back(N->getOperand(2));
4801 Ops.push_back(N->getOperand(3));
4802 Ops.push_back(N->getOperand(4));
4803 Ops.push_back(Chain);
4804 break;
4805 case NVPTXISD::Suld3DV2I16Zero:
4806 Opc = NVPTX::SULD_3D_V2I16_ZERO;
4807 Ops.push_back(TexHandle);
4808 Ops.push_back(N->getOperand(2));
4809 Ops.push_back(N->getOperand(3));
4810 Ops.push_back(N->getOperand(4));
4811 Ops.push_back(Chain);
4812 break;
4813 case NVPTXISD::Suld3DV2I32Zero:
4814 Opc = NVPTX::SULD_3D_V2I32_ZERO;
4815 Ops.push_back(TexHandle);
4816 Ops.push_back(N->getOperand(2));
4817 Ops.push_back(N->getOperand(3));
4818 Ops.push_back(N->getOperand(4));
4819 Ops.push_back(Chain);
4820 break;
4821 case NVPTXISD::Suld3DV2I64Zero:
4822 Opc = NVPTX::SULD_3D_V2I64_ZERO;
4823 Ops.push_back(TexHandle);
4824 Ops.push_back(N->getOperand(2));
4825 Ops.push_back(N->getOperand(3));
4826 Ops.push_back(N->getOperand(4));
4827 Ops.push_back(Chain);
4828 break;
4829 case NVPTXISD::Suld3DV4I8Zero:
4830 Opc = NVPTX::SULD_3D_V4I8_ZERO;
4831 Ops.push_back(TexHandle);
4832 Ops.push_back(N->getOperand(2));
4833 Ops.push_back(N->getOperand(3));
4834 Ops.push_back(N->getOperand(4));
4835 Ops.push_back(Chain);
4836 break;
4837 case NVPTXISD::Suld3DV4I16Zero:
4838 Opc = NVPTX::SULD_3D_V4I16_ZERO;
4839 Ops.push_back(TexHandle);
4840 Ops.push_back(N->getOperand(2));
4841 Ops.push_back(N->getOperand(3));
4842 Ops.push_back(N->getOperand(4));
4843 Ops.push_back(Chain);
4844 break;
4845 case NVPTXISD::Suld3DV4I32Zero:
4846 Opc = NVPTX::SULD_3D_V4I32_ZERO;
4847 Ops.push_back(TexHandle);
4848 Ops.push_back(N->getOperand(2));
4849 Ops.push_back(N->getOperand(3));
4850 Ops.push_back(N->getOperand(4));
4851 Ops.push_back(Chain);
4852 break;
4853 }
4854 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
4855 return true;
4856 }
4857
4858
4859 /// SelectBFE - Look for instruction sequences that can be made more efficient
4860 /// by using the 'bfe' (bit-field extract) PTX instruction
tryBFE(SDNode * N)4861 bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
4862 SDLoc DL(N);
4863 SDValue LHS = N->getOperand(0);
4864 SDValue RHS = N->getOperand(1);
4865 SDValue Len;
4866 SDValue Start;
4867 SDValue Val;
4868 bool IsSigned = false;
4869
4870 if (N->getOpcode() == ISD::AND) {
4871 // Canonicalize the operands
4872 // We want 'and %val, %mask'
4873 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4874 std::swap(LHS, RHS);
4875 }
4876
4877 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4878 if (!Mask) {
4879 // We need a constant mask on the RHS of the AND
4880 return false;
4881 }
4882
4883 // Extract the mask bits
4884 uint64_t MaskVal = Mask->getZExtValue();
4885 if (!isMask_64(MaskVal)) {
4886 // We *could* handle shifted masks here, but doing so would require an
4887 // 'and' operation to fix up the low-order bits so we would trade
4888 // shr+and for bfe+and, which has the same throughput
4889 return false;
4890 }
4891
4892 // How many bits are in our mask?
4893 uint64_t NumBits = countTrailingOnes(MaskVal);
4894 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4895
4896 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4897 // We have a 'srl/and' pair, extract the effective start bit and length
4898 Val = LHS.getNode()->getOperand(0);
4899 Start = LHS.getNode()->getOperand(1);
4900 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4901 if (StartConst) {
4902 uint64_t StartVal = StartConst->getZExtValue();
4903 // How many "good" bits do we have left? "good" is defined here as bits
4904 // that exist in the original value, not shifted in.
4905 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4906 if (NumBits > GoodBits) {
4907 // Do not handle the case where bits have been shifted in. In theory
4908 // we could handle this, but the cost is likely higher than just
4909 // emitting the srl/and pair.
4910 return false;
4911 }
4912 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
4913 } else {
4914 // Do not handle the case where the shift amount (can be zero if no srl
4915 // was found) is not constant. We could handle this case, but it would
4916 // require run-time logic that would be more expensive than just
4917 // emitting the srl/and pair.
4918 return false;
4919 }
4920 } else {
4921 // Do not handle the case where the LHS of the and is not a shift. While
4922 // it would be trivial to handle this case, it would just transform
4923 // 'and' -> 'bfe', but 'and' has higher-throughput.
4924 return false;
4925 }
4926 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4927 if (LHS->getOpcode() == ISD::AND) {
4928 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4929 if (!ShiftCnst) {
4930 // Shift amount must be constant
4931 return false;
4932 }
4933
4934 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4935
4936 SDValue AndLHS = LHS->getOperand(0);
4937 SDValue AndRHS = LHS->getOperand(1);
4938
4939 // Canonicalize the AND to have the mask on the RHS
4940 if (isa<ConstantSDNode>(AndLHS)) {
4941 std::swap(AndLHS, AndRHS);
4942 }
4943
4944 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4945 if (!MaskCnst) {
4946 // Mask must be constant
4947 return false;
4948 }
4949
4950 uint64_t MaskVal = MaskCnst->getZExtValue();
4951 uint64_t NumZeros;
4952 uint64_t NumBits;
4953 if (isMask_64(MaskVal)) {
4954 NumZeros = 0;
4955 // The number of bits in the result bitfield will be the number of
4956 // trailing ones (the AND) minus the number of bits we shift off
4957 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
4958 } else if (isShiftedMask_64(MaskVal)) {
4959 NumZeros = countTrailingZeros(MaskVal);
4960 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
4961 // The number of bits in the result bitfield will be the number of
4962 // trailing zeros plus the number of set bits in the mask minus the
4963 // number of bits we shift off
4964 NumBits = NumZeros + NumOnes - ShiftAmt;
4965 } else {
4966 // This is not a mask we can handle
4967 return false;
4968 }
4969
4970 if (ShiftAmt < NumZeros) {
4971 // Handling this case would require extra logic that would make this
4972 // transformation non-profitable
4973 return false;
4974 }
4975
4976 Val = AndLHS;
4977 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4978 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
4979 } else if (LHS->getOpcode() == ISD::SHL) {
4980 // Here, we have a pattern like:
4981 //
4982 // (sra (shl val, NN), MM)
4983 // or
4984 // (srl (shl val, NN), MM)
4985 //
4986 // If MM >= NN, we can efficiently optimize this with bfe
4987 Val = LHS->getOperand(0);
4988
4989 SDValue ShlRHS = LHS->getOperand(1);
4990 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4991 if (!ShlCnst) {
4992 // Shift amount must be constant
4993 return false;
4994 }
4995 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4996
4997 SDValue ShrRHS = RHS;
4998 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4999 if (!ShrCnst) {
5000 // Shift amount must be constant
5001 return false;
5002 }
5003 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
5004
5005 // To avoid extra codegen and be profitable, we need Outer >= Inner
5006 if (OuterShiftAmt < InnerShiftAmt) {
5007 return false;
5008 }
5009
5010 // If the outer shift is more than the type size, we have no bitfield to
5011 // extract (since we also check that the inner shift is <= the outer shift
5012 // then this also implies that the inner shift is < the type size)
5013 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
5014 return false;
5015 }
5016
5017 Start =
5018 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
5019 Len =
5020 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
5021 OuterShiftAmt, DL, MVT::i32);
5022
5023 if (N->getOpcode() == ISD::SRA) {
5024 // If we have a arithmetic right shift, we need to use the signed bfe
5025 // variant
5026 IsSigned = true;
5027 }
5028 } else {
5029 // No can do...
5030 return false;
5031 }
5032 } else {
5033 // No can do...
5034 return false;
5035 }
5036
5037
5038 unsigned Opc;
5039 // For the BFE operations we form here from "and" and "srl", always use the
5040 // unsigned variants.
5041 if (Val.getValueType() == MVT::i32) {
5042 if (IsSigned) {
5043 Opc = NVPTX::BFE_S32rii;
5044 } else {
5045 Opc = NVPTX::BFE_U32rii;
5046 }
5047 } else if (Val.getValueType() == MVT::i64) {
5048 if (IsSigned) {
5049 Opc = NVPTX::BFE_S64rii;
5050 } else {
5051 Opc = NVPTX::BFE_U64rii;
5052 }
5053 } else {
5054 // We cannot handle this type
5055 return false;
5056 }
5057
5058 SDValue Ops[] = {
5059 Val, Start, Len
5060 };
5061
5062 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
5063 return true;
5064 }
5065
5066 // SelectDirectAddr - Match a direct address for DAG.
5067 // A direct address could be a globaladdress or externalsymbol.
SelectDirectAddr(SDValue N,SDValue & Address)5068 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
5069 // Return true if TGA or ES.
5070 if (N.getOpcode() == ISD::TargetGlobalAddress ||
5071 N.getOpcode() == ISD::TargetExternalSymbol) {
5072 Address = N;
5073 return true;
5074 }
5075 if (N.getOpcode() == NVPTXISD::Wrapper) {
5076 Address = N.getOperand(0);
5077 return true;
5078 }
5079 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
5080 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
5081 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
5082 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
5083 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
5084 }
5085 return false;
5086 }
5087
5088 // symbol+offset
SelectADDRsi_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)5089 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
5090 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5091 if (Addr.getOpcode() == ISD::ADD) {
5092 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5093 SDValue base = Addr.getOperand(0);
5094 if (SelectDirectAddr(base, Base)) {
5095 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5096 mvt);
5097 return true;
5098 }
5099 }
5100 }
5101 return false;
5102 }
5103
5104 // symbol+offset
SelectADDRsi(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5105 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
5106 SDValue &Base, SDValue &Offset) {
5107 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
5108 }
5109
5110 // symbol+offset
SelectADDRsi64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5111 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
5112 SDValue &Base, SDValue &Offset) {
5113 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5114 }
5115
5116 // register+offset
SelectADDRri_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)5117 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5118 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
5119 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5120 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5121 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
5122 return true;
5123 }
5124 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5125 Addr.getOpcode() == ISD::TargetGlobalAddress)
5126 return false; // direct calls.
5127
5128 if (Addr.getOpcode() == ISD::ADD) {
5129 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5130 return false;
5131 }
5132 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5133 if (FrameIndexSDNode *FIN =
5134 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
5135 // Constant offset from frame ref.
5136 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5137 else
5138 Base = Addr.getOperand(0);
5139 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5140 mvt);
5141 return true;
5142 }
5143 }
5144 return false;
5145 }
5146
5147 // register+offset
SelectADDRri(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5148 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5149 SDValue &Base, SDValue &Offset) {
5150 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5151 }
5152
5153 // register+offset
SelectADDRri64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)5154 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5155 SDValue &Base, SDValue &Offset) {
5156 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5157 }
5158
ChkMemSDNodeAddressSpace(SDNode * N,unsigned int spN) const5159 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5160 unsigned int spN) const {
5161 const Value *Src = nullptr;
5162 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
5163 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5164 return true;
5165 Src = mN->getMemOperand()->getValue();
5166 }
5167 if (!Src)
5168 return false;
5169 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
5170 return (PT->getAddressSpace() == spN);
5171 return false;
5172 }
5173
5174 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5175 /// inline asm expressions.
SelectInlineAsmMemoryOperand(const SDValue & Op,unsigned ConstraintID,std::vector<SDValue> & OutOps)5176 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
5177 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
5178 SDValue Op0, Op1;
5179 switch (ConstraintID) {
5180 default:
5181 return true;
5182 case InlineAsm::Constraint_m: // memory
5183 if (SelectDirectAddr(Op, Op0)) {
5184 OutOps.push_back(Op0);
5185 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
5186 return false;
5187 }
5188 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5189 OutOps.push_back(Op0);
5190 OutOps.push_back(Op1);
5191 return false;
5192 }
5193 break;
5194 }
5195 return true;
5196 }
5197
5198 /// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
5199 /// conversion from \p SrcTy to \p DestTy.
GetConvertOpcode(MVT DestTy,MVT SrcTy,bool IsSigned)5200 unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
5201 bool IsSigned) {
5202 switch (SrcTy.SimpleTy) {
5203 default:
5204 llvm_unreachable("Unhandled source type");
5205 case MVT::i8:
5206 switch (DestTy.SimpleTy) {
5207 default:
5208 llvm_unreachable("Unhandled dest type");
5209 case MVT::i16:
5210 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
5211 case MVT::i32:
5212 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
5213 case MVT::i64:
5214 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
5215 }
5216 case MVT::i16:
5217 switch (DestTy.SimpleTy) {
5218 default:
5219 llvm_unreachable("Unhandled dest type");
5220 case MVT::i8:
5221 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
5222 case MVT::i32:
5223 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
5224 case MVT::i64:
5225 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
5226 }
5227 case MVT::i32:
5228 switch (DestTy.SimpleTy) {
5229 default:
5230 llvm_unreachable("Unhandled dest type");
5231 case MVT::i8:
5232 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
5233 case MVT::i16:
5234 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
5235 case MVT::i64:
5236 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
5237 }
5238 case MVT::i64:
5239 switch (DestTy.SimpleTy) {
5240 default:
5241 llvm_unreachable("Unhandled dest type");
5242 case MVT::i8:
5243 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
5244 case MVT::i16:
5245 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
5246 case MVT::i32:
5247 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
5248 }
5249 }
5250 }
5251