1 //===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the NVPTX target.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "NVPTXISelDAGToDAG.h"
15 #include "llvm/IR/GlobalValue.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Target/TargetIntrinsicInfo.h"
22
23 using namespace llvm;
24
25 #define DEBUG_TYPE "nvptx-isel"
26
27 unsigned FMAContractLevel = 0;
28
29 static cl::opt<unsigned, true>
30 FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
31 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
32 " 1: do it 2: do it aggressively"),
33 cl::location(FMAContractLevel),
34 cl::init(2));
35
36 static cl::opt<int> UsePrecDivF32(
37 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
38 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
39 " IEEE Compliant F32 div.rnd if avaiable."),
40 cl::init(2));
41
42 static cl::opt<bool>
43 UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
44 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
45 cl::init(true));
46
47 static cl::opt<bool>
48 FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
49 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
50 cl::init(false));
51
52
53 /// createNVPTXISelDag - This pass converts a legalized DAG into a
54 /// NVPTX-specific DAG, ready for instruction scheduling.
createNVPTXISelDag(NVPTXTargetMachine & TM,llvm::CodeGenOpt::Level OptLevel)55 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
56 llvm::CodeGenOpt::Level OptLevel) {
57 return new NVPTXDAGToDAGISel(TM, OptLevel);
58 }
59
NVPTXDAGToDAGISel(NVPTXTargetMachine & tm,CodeGenOpt::Level OptLevel)60 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
61 CodeGenOpt::Level OptLevel)
62 : SelectionDAGISel(tm, OptLevel),
63 Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
64
65 doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
66 doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
67 doFMAF32AGG =
68 (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
69 doFMAF64AGG =
70 (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
71
72 allowFMA = (FMAContractLevel >= 1);
73
74 doMulWide = (OptLevel > 0);
75 }
76
getDivF32Level() const77 int NVPTXDAGToDAGISel::getDivF32Level() const {
78 if (UsePrecDivF32.getNumOccurrences() > 0) {
79 // If nvptx-prec-div32=N is used on the command-line, always honor it
80 return UsePrecDivF32;
81 } else {
82 // Otherwise, use div.approx if fast math is enabled
83 if (TM.Options.UnsafeFPMath)
84 return 0;
85 else
86 return 2;
87 }
88 }
89
usePrecSqrtF32() const90 bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
91 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
92 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
93 return UsePrecSqrtF32;
94 } else {
95 // Otherwise, use sqrt.approx if fast math is enabled
96 if (TM.Options.UnsafeFPMath)
97 return false;
98 else
99 return true;
100 }
101 }
102
useF32FTZ() const103 bool NVPTXDAGToDAGISel::useF32FTZ() const {
104 if (FtzEnabled.getNumOccurrences() > 0) {
105 // If nvptx-f32ftz is used on the command-line, always honor it
106 return FtzEnabled;
107 } else {
108 const Function *F = MF->getFunction();
109 // Otherwise, check for an nvptx-f32ftz attribute on the function
110 if (F->hasFnAttribute("nvptx-f32ftz"))
111 return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
112 "nvptx-f32ftz")
113 .getValueAsString() == "true");
114 else
115 return false;
116 }
117 }
118
119 /// Select - Select instructions not customized! Used for
120 /// expanded, promoted and normal instructions.
Select(SDNode * N)121 SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
122
123 if (N->isMachineOpcode()) {
124 N->setNodeId(-1);
125 return nullptr; // Already selected.
126 }
127
128 SDNode *ResNode = nullptr;
129 switch (N->getOpcode()) {
130 case ISD::LOAD:
131 ResNode = SelectLoad(N);
132 break;
133 case ISD::STORE:
134 ResNode = SelectStore(N);
135 break;
136 case NVPTXISD::LoadV2:
137 case NVPTXISD::LoadV4:
138 ResNode = SelectLoadVector(N);
139 break;
140 case NVPTXISD::LDGV2:
141 case NVPTXISD::LDGV4:
142 case NVPTXISD::LDUV2:
143 case NVPTXISD::LDUV4:
144 ResNode = SelectLDGLDU(N);
145 break;
146 case NVPTXISD::StoreV2:
147 case NVPTXISD::StoreV4:
148 ResNode = SelectStoreVector(N);
149 break;
150 case NVPTXISD::LoadParam:
151 case NVPTXISD::LoadParamV2:
152 case NVPTXISD::LoadParamV4:
153 ResNode = SelectLoadParam(N);
154 break;
155 case NVPTXISD::StoreRetval:
156 case NVPTXISD::StoreRetvalV2:
157 case NVPTXISD::StoreRetvalV4:
158 ResNode = SelectStoreRetval(N);
159 break;
160 case NVPTXISD::StoreParam:
161 case NVPTXISD::StoreParamV2:
162 case NVPTXISD::StoreParamV4:
163 case NVPTXISD::StoreParamS32:
164 case NVPTXISD::StoreParamU32:
165 ResNode = SelectStoreParam(N);
166 break;
167 case ISD::INTRINSIC_WO_CHAIN:
168 ResNode = SelectIntrinsicNoChain(N);
169 break;
170 case ISD::INTRINSIC_W_CHAIN:
171 ResNode = SelectIntrinsicChain(N);
172 break;
173 case NVPTXISD::Tex1DFloatI32:
174 case NVPTXISD::Tex1DFloatFloat:
175 case NVPTXISD::Tex1DFloatFloatLevel:
176 case NVPTXISD::Tex1DFloatFloatGrad:
177 case NVPTXISD::Tex1DI32I32:
178 case NVPTXISD::Tex1DI32Float:
179 case NVPTXISD::Tex1DI32FloatLevel:
180 case NVPTXISD::Tex1DI32FloatGrad:
181 case NVPTXISD::Tex1DArrayFloatI32:
182 case NVPTXISD::Tex1DArrayFloatFloat:
183 case NVPTXISD::Tex1DArrayFloatFloatLevel:
184 case NVPTXISD::Tex1DArrayFloatFloatGrad:
185 case NVPTXISD::Tex1DArrayI32I32:
186 case NVPTXISD::Tex1DArrayI32Float:
187 case NVPTXISD::Tex1DArrayI32FloatLevel:
188 case NVPTXISD::Tex1DArrayI32FloatGrad:
189 case NVPTXISD::Tex2DFloatI32:
190 case NVPTXISD::Tex2DFloatFloat:
191 case NVPTXISD::Tex2DFloatFloatLevel:
192 case NVPTXISD::Tex2DFloatFloatGrad:
193 case NVPTXISD::Tex2DI32I32:
194 case NVPTXISD::Tex2DI32Float:
195 case NVPTXISD::Tex2DI32FloatLevel:
196 case NVPTXISD::Tex2DI32FloatGrad:
197 case NVPTXISD::Tex2DArrayFloatI32:
198 case NVPTXISD::Tex2DArrayFloatFloat:
199 case NVPTXISD::Tex2DArrayFloatFloatLevel:
200 case NVPTXISD::Tex2DArrayFloatFloatGrad:
201 case NVPTXISD::Tex2DArrayI32I32:
202 case NVPTXISD::Tex2DArrayI32Float:
203 case NVPTXISD::Tex2DArrayI32FloatLevel:
204 case NVPTXISD::Tex2DArrayI32FloatGrad:
205 case NVPTXISD::Tex3DFloatI32:
206 case NVPTXISD::Tex3DFloatFloat:
207 case NVPTXISD::Tex3DFloatFloatLevel:
208 case NVPTXISD::Tex3DFloatFloatGrad:
209 case NVPTXISD::Tex3DI32I32:
210 case NVPTXISD::Tex3DI32Float:
211 case NVPTXISD::Tex3DI32FloatLevel:
212 case NVPTXISD::Tex3DI32FloatGrad:
213 ResNode = SelectTextureIntrinsic(N);
214 break;
215 case NVPTXISD::Suld1DI8Trap:
216 case NVPTXISD::Suld1DI16Trap:
217 case NVPTXISD::Suld1DI32Trap:
218 case NVPTXISD::Suld1DV2I8Trap:
219 case NVPTXISD::Suld1DV2I16Trap:
220 case NVPTXISD::Suld1DV2I32Trap:
221 case NVPTXISD::Suld1DV4I8Trap:
222 case NVPTXISD::Suld1DV4I16Trap:
223 case NVPTXISD::Suld1DV4I32Trap:
224 case NVPTXISD::Suld1DArrayI8Trap:
225 case NVPTXISD::Suld1DArrayI16Trap:
226 case NVPTXISD::Suld1DArrayI32Trap:
227 case NVPTXISD::Suld1DArrayV2I8Trap:
228 case NVPTXISD::Suld1DArrayV2I16Trap:
229 case NVPTXISD::Suld1DArrayV2I32Trap:
230 case NVPTXISD::Suld1DArrayV4I8Trap:
231 case NVPTXISD::Suld1DArrayV4I16Trap:
232 case NVPTXISD::Suld1DArrayV4I32Trap:
233 case NVPTXISD::Suld2DI8Trap:
234 case NVPTXISD::Suld2DI16Trap:
235 case NVPTXISD::Suld2DI32Trap:
236 case NVPTXISD::Suld2DV2I8Trap:
237 case NVPTXISD::Suld2DV2I16Trap:
238 case NVPTXISD::Suld2DV2I32Trap:
239 case NVPTXISD::Suld2DV4I8Trap:
240 case NVPTXISD::Suld2DV4I16Trap:
241 case NVPTXISD::Suld2DV4I32Trap:
242 case NVPTXISD::Suld2DArrayI8Trap:
243 case NVPTXISD::Suld2DArrayI16Trap:
244 case NVPTXISD::Suld2DArrayI32Trap:
245 case NVPTXISD::Suld2DArrayV2I8Trap:
246 case NVPTXISD::Suld2DArrayV2I16Trap:
247 case NVPTXISD::Suld2DArrayV2I32Trap:
248 case NVPTXISD::Suld2DArrayV4I8Trap:
249 case NVPTXISD::Suld2DArrayV4I16Trap:
250 case NVPTXISD::Suld2DArrayV4I32Trap:
251 case NVPTXISD::Suld3DI8Trap:
252 case NVPTXISD::Suld3DI16Trap:
253 case NVPTXISD::Suld3DI32Trap:
254 case NVPTXISD::Suld3DV2I8Trap:
255 case NVPTXISD::Suld3DV2I16Trap:
256 case NVPTXISD::Suld3DV2I32Trap:
257 case NVPTXISD::Suld3DV4I8Trap:
258 case NVPTXISD::Suld3DV4I16Trap:
259 case NVPTXISD::Suld3DV4I32Trap:
260 ResNode = SelectSurfaceIntrinsic(N);
261 break;
262 case ISD::AND:
263 case ISD::SRA:
264 case ISD::SRL:
265 // Try to select BFE
266 ResNode = SelectBFE(N);
267 break;
268 case ISD::ADDRSPACECAST:
269 ResNode = SelectAddrSpaceCast(N);
270 break;
271 default:
272 break;
273 }
274 if (ResNode)
275 return ResNode;
276 return SelectCode(N);
277 }
278
SelectIntrinsicChain(SDNode * N)279 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
280 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
281 switch (IID) {
282 default:
283 return NULL;
284 case Intrinsic::nvvm_ldg_global_f:
285 case Intrinsic::nvvm_ldg_global_i:
286 case Intrinsic::nvvm_ldg_global_p:
287 case Intrinsic::nvvm_ldu_global_f:
288 case Intrinsic::nvvm_ldu_global_i:
289 case Intrinsic::nvvm_ldu_global_p:
290 return SelectLDGLDU(N);
291 }
292 }
293
getCodeAddrSpace(MemSDNode * N,const NVPTXSubtarget & Subtarget)294 static unsigned int getCodeAddrSpace(MemSDNode *N,
295 const NVPTXSubtarget &Subtarget) {
296 const Value *Src = N->getMemOperand()->getValue();
297
298 if (!Src)
299 return NVPTX::PTXLdStInstCode::GENERIC;
300
301 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
302 switch (PT->getAddressSpace()) {
303 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
304 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
305 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
306 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
307 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
308 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
309 default: break;
310 }
311 }
312 return NVPTX::PTXLdStInstCode::GENERIC;
313 }
314
SelectIntrinsicNoChain(SDNode * N)315 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
316 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
317 switch (IID) {
318 default:
319 return nullptr;
320 case Intrinsic::nvvm_texsurf_handle_internal:
321 return SelectTexSurfHandle(N);
322 }
323 }
324
SelectTexSurfHandle(SDNode * N)325 SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
326 // Op 0 is the intrinsic ID
327 SDValue Wrapper = N->getOperand(1);
328 SDValue GlobalVal = Wrapper.getOperand(0);
329 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
330 GlobalVal);
331 }
332
SelectAddrSpaceCast(SDNode * N)333 SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
334 SDValue Src = N->getOperand(0);
335 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
336 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
337 unsigned DstAddrSpace = CastN->getDestAddressSpace();
338
339 assert(SrcAddrSpace != DstAddrSpace &&
340 "addrspacecast must be between different address spaces");
341
342 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
343 // Specific to generic
344 unsigned Opc;
345 switch (SrcAddrSpace) {
346 default: report_fatal_error("Bad address space in addrspacecast");
347 case ADDRESS_SPACE_GLOBAL:
348 Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64
349 : NVPTX::cvta_global_yes;
350 break;
351 case ADDRESS_SPACE_SHARED:
352 Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64
353 : NVPTX::cvta_shared_yes;
354 break;
355 case ADDRESS_SPACE_CONST:
356 Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64
357 : NVPTX::cvta_const_yes;
358 break;
359 case ADDRESS_SPACE_LOCAL:
360 Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64
361 : NVPTX::cvta_local_yes;
362 break;
363 }
364 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
365 } else {
366 // Generic to specific
367 if (SrcAddrSpace != 0)
368 report_fatal_error("Cannot cast between two non-generic address spaces");
369 unsigned Opc;
370 switch (DstAddrSpace) {
371 default: report_fatal_error("Bad address space in addrspacecast");
372 case ADDRESS_SPACE_GLOBAL:
373 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64
374 : NVPTX::cvta_to_global_yes;
375 break;
376 case ADDRESS_SPACE_SHARED:
377 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64
378 : NVPTX::cvta_to_shared_yes;
379 break;
380 case ADDRESS_SPACE_CONST:
381 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64
382 : NVPTX::cvta_to_const_yes;
383 break;
384 case ADDRESS_SPACE_LOCAL:
385 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64
386 : NVPTX::cvta_to_local_yes;
387 break;
388 }
389 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
390 }
391 }
392
SelectLoad(SDNode * N)393 SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
394 SDLoc dl(N);
395 LoadSDNode *LD = cast<LoadSDNode>(N);
396 EVT LoadedVT = LD->getMemoryVT();
397 SDNode *NVPTXLD = nullptr;
398
399 // do not support pre/post inc/dec
400 if (LD->isIndexed())
401 return nullptr;
402
403 if (!LoadedVT.isSimple())
404 return nullptr;
405
406 // Address Space Setting
407 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
408
409 // Volatile Setting
410 // - .volatile is only availalble for .global and .shared
411 bool isVolatile = LD->isVolatile();
412 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
413 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
414 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
415 isVolatile = false;
416
417 // Vector Setting
418 MVT SimpleVT = LoadedVT.getSimpleVT();
419 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
420 if (SimpleVT.isVector()) {
421 unsigned num = SimpleVT.getVectorNumElements();
422 if (num == 2)
423 vecType = NVPTX::PTXLdStInstCode::V2;
424 else if (num == 4)
425 vecType = NVPTX::PTXLdStInstCode::V4;
426 else
427 return nullptr;
428 }
429
430 // Type Setting: fromType + fromTypeWidth
431 //
432 // Sign : ISD::SEXTLOAD
433 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
434 // type is integer
435 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
436 MVT ScalarVT = SimpleVT.getScalarType();
437 // Read at least 8 bits (predicates are stored as 8-bit values)
438 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
439 unsigned int fromType;
440 if ((LD->getExtensionType() == ISD::SEXTLOAD))
441 fromType = NVPTX::PTXLdStInstCode::Signed;
442 else if (ScalarVT.isFloatingPoint())
443 fromType = NVPTX::PTXLdStInstCode::Float;
444 else
445 fromType = NVPTX::PTXLdStInstCode::Unsigned;
446
447 // Create the machine instruction DAG
448 SDValue Chain = N->getOperand(0);
449 SDValue N1 = N->getOperand(1);
450 SDValue Addr;
451 SDValue Offset, Base;
452 unsigned Opcode;
453 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
454
455 if (SelectDirectAddr(N1, Addr)) {
456 switch (TargetVT) {
457 case MVT::i8:
458 Opcode = NVPTX::LD_i8_avar;
459 break;
460 case MVT::i16:
461 Opcode = NVPTX::LD_i16_avar;
462 break;
463 case MVT::i32:
464 Opcode = NVPTX::LD_i32_avar;
465 break;
466 case MVT::i64:
467 Opcode = NVPTX::LD_i64_avar;
468 break;
469 case MVT::f32:
470 Opcode = NVPTX::LD_f32_avar;
471 break;
472 case MVT::f64:
473 Opcode = NVPTX::LD_f64_avar;
474 break;
475 default:
476 return nullptr;
477 }
478 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
479 getI32Imm(vecType), getI32Imm(fromType),
480 getI32Imm(fromTypeWidth), Addr, Chain };
481 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
482 } else if (Subtarget.is64Bit()
483 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
484 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
485 switch (TargetVT) {
486 case MVT::i8:
487 Opcode = NVPTX::LD_i8_asi;
488 break;
489 case MVT::i16:
490 Opcode = NVPTX::LD_i16_asi;
491 break;
492 case MVT::i32:
493 Opcode = NVPTX::LD_i32_asi;
494 break;
495 case MVT::i64:
496 Opcode = NVPTX::LD_i64_asi;
497 break;
498 case MVT::f32:
499 Opcode = NVPTX::LD_f32_asi;
500 break;
501 case MVT::f64:
502 Opcode = NVPTX::LD_f64_asi;
503 break;
504 default:
505 return nullptr;
506 }
507 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
508 getI32Imm(vecType), getI32Imm(fromType),
509 getI32Imm(fromTypeWidth), Base, Offset, Chain };
510 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
511 } else if (Subtarget.is64Bit()
512 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
513 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
514 if (Subtarget.is64Bit()) {
515 switch (TargetVT) {
516 case MVT::i8:
517 Opcode = NVPTX::LD_i8_ari_64;
518 break;
519 case MVT::i16:
520 Opcode = NVPTX::LD_i16_ari_64;
521 break;
522 case MVT::i32:
523 Opcode = NVPTX::LD_i32_ari_64;
524 break;
525 case MVT::i64:
526 Opcode = NVPTX::LD_i64_ari_64;
527 break;
528 case MVT::f32:
529 Opcode = NVPTX::LD_f32_ari_64;
530 break;
531 case MVT::f64:
532 Opcode = NVPTX::LD_f64_ari_64;
533 break;
534 default:
535 return nullptr;
536 }
537 } else {
538 switch (TargetVT) {
539 case MVT::i8:
540 Opcode = NVPTX::LD_i8_ari;
541 break;
542 case MVT::i16:
543 Opcode = NVPTX::LD_i16_ari;
544 break;
545 case MVT::i32:
546 Opcode = NVPTX::LD_i32_ari;
547 break;
548 case MVT::i64:
549 Opcode = NVPTX::LD_i64_ari;
550 break;
551 case MVT::f32:
552 Opcode = NVPTX::LD_f32_ari;
553 break;
554 case MVT::f64:
555 Opcode = NVPTX::LD_f64_ari;
556 break;
557 default:
558 return nullptr;
559 }
560 }
561 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
562 getI32Imm(vecType), getI32Imm(fromType),
563 getI32Imm(fromTypeWidth), Base, Offset, Chain };
564 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
565 } else {
566 if (Subtarget.is64Bit()) {
567 switch (TargetVT) {
568 case MVT::i8:
569 Opcode = NVPTX::LD_i8_areg_64;
570 break;
571 case MVT::i16:
572 Opcode = NVPTX::LD_i16_areg_64;
573 break;
574 case MVT::i32:
575 Opcode = NVPTX::LD_i32_areg_64;
576 break;
577 case MVT::i64:
578 Opcode = NVPTX::LD_i64_areg_64;
579 break;
580 case MVT::f32:
581 Opcode = NVPTX::LD_f32_areg_64;
582 break;
583 case MVT::f64:
584 Opcode = NVPTX::LD_f64_areg_64;
585 break;
586 default:
587 return nullptr;
588 }
589 } else {
590 switch (TargetVT) {
591 case MVT::i8:
592 Opcode = NVPTX::LD_i8_areg;
593 break;
594 case MVT::i16:
595 Opcode = NVPTX::LD_i16_areg;
596 break;
597 case MVT::i32:
598 Opcode = NVPTX::LD_i32_areg;
599 break;
600 case MVT::i64:
601 Opcode = NVPTX::LD_i64_areg;
602 break;
603 case MVT::f32:
604 Opcode = NVPTX::LD_f32_areg;
605 break;
606 case MVT::f64:
607 Opcode = NVPTX::LD_f64_areg;
608 break;
609 default:
610 return nullptr;
611 }
612 }
613 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
614 getI32Imm(vecType), getI32Imm(fromType),
615 getI32Imm(fromTypeWidth), N1, Chain };
616 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
617 }
618
619 if (NVPTXLD) {
620 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
621 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
622 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
623 }
624
625 return NVPTXLD;
626 }
627
SelectLoadVector(SDNode * N)628 SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
629
630 SDValue Chain = N->getOperand(0);
631 SDValue Op1 = N->getOperand(1);
632 SDValue Addr, Offset, Base;
633 unsigned Opcode;
634 SDLoc DL(N);
635 SDNode *LD;
636 MemSDNode *MemSD = cast<MemSDNode>(N);
637 EVT LoadedVT = MemSD->getMemoryVT();
638
639 if (!LoadedVT.isSimple())
640 return nullptr;
641
642 // Address Space Setting
643 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
644
645 // Volatile Setting
646 // - .volatile is only availalble for .global and .shared
647 bool IsVolatile = MemSD->isVolatile();
648 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
649 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
650 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
651 IsVolatile = false;
652
653 // Vector Setting
654 MVT SimpleVT = LoadedVT.getSimpleVT();
655
656 // Type Setting: fromType + fromTypeWidth
657 //
658 // Sign : ISD::SEXTLOAD
659 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
660 // type is integer
661 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
662 MVT ScalarVT = SimpleVT.getScalarType();
663 // Read at least 8 bits (predicates are stored as 8-bit values)
664 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
665 unsigned int FromType;
666 // The last operand holds the original LoadSDNode::getExtensionType() value
667 unsigned ExtensionType = cast<ConstantSDNode>(
668 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
669 if (ExtensionType == ISD::SEXTLOAD)
670 FromType = NVPTX::PTXLdStInstCode::Signed;
671 else if (ScalarVT.isFloatingPoint())
672 FromType = NVPTX::PTXLdStInstCode::Float;
673 else
674 FromType = NVPTX::PTXLdStInstCode::Unsigned;
675
676 unsigned VecType;
677
678 switch (N->getOpcode()) {
679 case NVPTXISD::LoadV2:
680 VecType = NVPTX::PTXLdStInstCode::V2;
681 break;
682 case NVPTXISD::LoadV4:
683 VecType = NVPTX::PTXLdStInstCode::V4;
684 break;
685 default:
686 return nullptr;
687 }
688
689 EVT EltVT = N->getValueType(0);
690
691 if (SelectDirectAddr(Op1, Addr)) {
692 switch (N->getOpcode()) {
693 default:
694 return nullptr;
695 case NVPTXISD::LoadV2:
696 switch (EltVT.getSimpleVT().SimpleTy) {
697 default:
698 return nullptr;
699 case MVT::i8:
700 Opcode = NVPTX::LDV_i8_v2_avar;
701 break;
702 case MVT::i16:
703 Opcode = NVPTX::LDV_i16_v2_avar;
704 break;
705 case MVT::i32:
706 Opcode = NVPTX::LDV_i32_v2_avar;
707 break;
708 case MVT::i64:
709 Opcode = NVPTX::LDV_i64_v2_avar;
710 break;
711 case MVT::f32:
712 Opcode = NVPTX::LDV_f32_v2_avar;
713 break;
714 case MVT::f64:
715 Opcode = NVPTX::LDV_f64_v2_avar;
716 break;
717 }
718 break;
719 case NVPTXISD::LoadV4:
720 switch (EltVT.getSimpleVT().SimpleTy) {
721 default:
722 return nullptr;
723 case MVT::i8:
724 Opcode = NVPTX::LDV_i8_v4_avar;
725 break;
726 case MVT::i16:
727 Opcode = NVPTX::LDV_i16_v4_avar;
728 break;
729 case MVT::i32:
730 Opcode = NVPTX::LDV_i32_v4_avar;
731 break;
732 case MVT::f32:
733 Opcode = NVPTX::LDV_f32_v4_avar;
734 break;
735 }
736 break;
737 }
738
739 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
740 getI32Imm(VecType), getI32Imm(FromType),
741 getI32Imm(FromTypeWidth), Addr, Chain };
742 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
743 } else if (Subtarget.is64Bit()
744 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
745 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
746 switch (N->getOpcode()) {
747 default:
748 return nullptr;
749 case NVPTXISD::LoadV2:
750 switch (EltVT.getSimpleVT().SimpleTy) {
751 default:
752 return nullptr;
753 case MVT::i8:
754 Opcode = NVPTX::LDV_i8_v2_asi;
755 break;
756 case MVT::i16:
757 Opcode = NVPTX::LDV_i16_v2_asi;
758 break;
759 case MVT::i32:
760 Opcode = NVPTX::LDV_i32_v2_asi;
761 break;
762 case MVT::i64:
763 Opcode = NVPTX::LDV_i64_v2_asi;
764 break;
765 case MVT::f32:
766 Opcode = NVPTX::LDV_f32_v2_asi;
767 break;
768 case MVT::f64:
769 Opcode = NVPTX::LDV_f64_v2_asi;
770 break;
771 }
772 break;
773 case NVPTXISD::LoadV4:
774 switch (EltVT.getSimpleVT().SimpleTy) {
775 default:
776 return nullptr;
777 case MVT::i8:
778 Opcode = NVPTX::LDV_i8_v4_asi;
779 break;
780 case MVT::i16:
781 Opcode = NVPTX::LDV_i16_v4_asi;
782 break;
783 case MVT::i32:
784 Opcode = NVPTX::LDV_i32_v4_asi;
785 break;
786 case MVT::f32:
787 Opcode = NVPTX::LDV_f32_v4_asi;
788 break;
789 }
790 break;
791 }
792
793 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
794 getI32Imm(VecType), getI32Imm(FromType),
795 getI32Imm(FromTypeWidth), Base, Offset, Chain };
796 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
797 } else if (Subtarget.is64Bit()
798 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
799 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
800 if (Subtarget.is64Bit()) {
801 switch (N->getOpcode()) {
802 default:
803 return nullptr;
804 case NVPTXISD::LoadV2:
805 switch (EltVT.getSimpleVT().SimpleTy) {
806 default:
807 return nullptr;
808 case MVT::i8:
809 Opcode = NVPTX::LDV_i8_v2_ari_64;
810 break;
811 case MVT::i16:
812 Opcode = NVPTX::LDV_i16_v2_ari_64;
813 break;
814 case MVT::i32:
815 Opcode = NVPTX::LDV_i32_v2_ari_64;
816 break;
817 case MVT::i64:
818 Opcode = NVPTX::LDV_i64_v2_ari_64;
819 break;
820 case MVT::f32:
821 Opcode = NVPTX::LDV_f32_v2_ari_64;
822 break;
823 case MVT::f64:
824 Opcode = NVPTX::LDV_f64_v2_ari_64;
825 break;
826 }
827 break;
828 case NVPTXISD::LoadV4:
829 switch (EltVT.getSimpleVT().SimpleTy) {
830 default:
831 return nullptr;
832 case MVT::i8:
833 Opcode = NVPTX::LDV_i8_v4_ari_64;
834 break;
835 case MVT::i16:
836 Opcode = NVPTX::LDV_i16_v4_ari_64;
837 break;
838 case MVT::i32:
839 Opcode = NVPTX::LDV_i32_v4_ari_64;
840 break;
841 case MVT::f32:
842 Opcode = NVPTX::LDV_f32_v4_ari_64;
843 break;
844 }
845 break;
846 }
847 } else {
848 switch (N->getOpcode()) {
849 default:
850 return nullptr;
851 case NVPTXISD::LoadV2:
852 switch (EltVT.getSimpleVT().SimpleTy) {
853 default:
854 return nullptr;
855 case MVT::i8:
856 Opcode = NVPTX::LDV_i8_v2_ari;
857 break;
858 case MVT::i16:
859 Opcode = NVPTX::LDV_i16_v2_ari;
860 break;
861 case MVT::i32:
862 Opcode = NVPTX::LDV_i32_v2_ari;
863 break;
864 case MVT::i64:
865 Opcode = NVPTX::LDV_i64_v2_ari;
866 break;
867 case MVT::f32:
868 Opcode = NVPTX::LDV_f32_v2_ari;
869 break;
870 case MVT::f64:
871 Opcode = NVPTX::LDV_f64_v2_ari;
872 break;
873 }
874 break;
875 case NVPTXISD::LoadV4:
876 switch (EltVT.getSimpleVT().SimpleTy) {
877 default:
878 return nullptr;
879 case MVT::i8:
880 Opcode = NVPTX::LDV_i8_v4_ari;
881 break;
882 case MVT::i16:
883 Opcode = NVPTX::LDV_i16_v4_ari;
884 break;
885 case MVT::i32:
886 Opcode = NVPTX::LDV_i32_v4_ari;
887 break;
888 case MVT::f32:
889 Opcode = NVPTX::LDV_f32_v4_ari;
890 break;
891 }
892 break;
893 }
894 }
895
896 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
897 getI32Imm(VecType), getI32Imm(FromType),
898 getI32Imm(FromTypeWidth), Base, Offset, Chain };
899
900 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
901 } else {
902 if (Subtarget.is64Bit()) {
903 switch (N->getOpcode()) {
904 default:
905 return nullptr;
906 case NVPTXISD::LoadV2:
907 switch (EltVT.getSimpleVT().SimpleTy) {
908 default:
909 return nullptr;
910 case MVT::i8:
911 Opcode = NVPTX::LDV_i8_v2_areg_64;
912 break;
913 case MVT::i16:
914 Opcode = NVPTX::LDV_i16_v2_areg_64;
915 break;
916 case MVT::i32:
917 Opcode = NVPTX::LDV_i32_v2_areg_64;
918 break;
919 case MVT::i64:
920 Opcode = NVPTX::LDV_i64_v2_areg_64;
921 break;
922 case MVT::f32:
923 Opcode = NVPTX::LDV_f32_v2_areg_64;
924 break;
925 case MVT::f64:
926 Opcode = NVPTX::LDV_f64_v2_areg_64;
927 break;
928 }
929 break;
930 case NVPTXISD::LoadV4:
931 switch (EltVT.getSimpleVT().SimpleTy) {
932 default:
933 return nullptr;
934 case MVT::i8:
935 Opcode = NVPTX::LDV_i8_v4_areg_64;
936 break;
937 case MVT::i16:
938 Opcode = NVPTX::LDV_i16_v4_areg_64;
939 break;
940 case MVT::i32:
941 Opcode = NVPTX::LDV_i32_v4_areg_64;
942 break;
943 case MVT::f32:
944 Opcode = NVPTX::LDV_f32_v4_areg_64;
945 break;
946 }
947 break;
948 }
949 } else {
950 switch (N->getOpcode()) {
951 default:
952 return nullptr;
953 case NVPTXISD::LoadV2:
954 switch (EltVT.getSimpleVT().SimpleTy) {
955 default:
956 return nullptr;
957 case MVT::i8:
958 Opcode = NVPTX::LDV_i8_v2_areg;
959 break;
960 case MVT::i16:
961 Opcode = NVPTX::LDV_i16_v2_areg;
962 break;
963 case MVT::i32:
964 Opcode = NVPTX::LDV_i32_v2_areg;
965 break;
966 case MVT::i64:
967 Opcode = NVPTX::LDV_i64_v2_areg;
968 break;
969 case MVT::f32:
970 Opcode = NVPTX::LDV_f32_v2_areg;
971 break;
972 case MVT::f64:
973 Opcode = NVPTX::LDV_f64_v2_areg;
974 break;
975 }
976 break;
977 case NVPTXISD::LoadV4:
978 switch (EltVT.getSimpleVT().SimpleTy) {
979 default:
980 return nullptr;
981 case MVT::i8:
982 Opcode = NVPTX::LDV_i8_v4_areg;
983 break;
984 case MVT::i16:
985 Opcode = NVPTX::LDV_i16_v4_areg;
986 break;
987 case MVT::i32:
988 Opcode = NVPTX::LDV_i32_v4_areg;
989 break;
990 case MVT::f32:
991 Opcode = NVPTX::LDV_f32_v4_areg;
992 break;
993 }
994 break;
995 }
996 }
997
998 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
999 getI32Imm(VecType), getI32Imm(FromType),
1000 getI32Imm(FromTypeWidth), Op1, Chain };
1001 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1002 }
1003
1004 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1005 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1006 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1007
1008 return LD;
1009 }
1010
SelectLDGLDU(SDNode * N)1011 SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
1012
1013 SDValue Chain = N->getOperand(0);
1014 SDValue Op1;
1015 MemSDNode *Mem;
1016 bool IsLDG = true;
1017
1018 // If this is an LDG intrinsic, the address is the third operand. Its its an
1019 // LDG/LDU SD node (from custom vector handling), then its the second operand
1020 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1021 Op1 = N->getOperand(2);
1022 Mem = cast<MemIntrinsicSDNode>(N);
1023 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1024 switch (IID) {
1025 default:
1026 return NULL;
1027 case Intrinsic::nvvm_ldg_global_f:
1028 case Intrinsic::nvvm_ldg_global_i:
1029 case Intrinsic::nvvm_ldg_global_p:
1030 IsLDG = true;
1031 break;
1032 case Intrinsic::nvvm_ldu_global_f:
1033 case Intrinsic::nvvm_ldu_global_i:
1034 case Intrinsic::nvvm_ldu_global_p:
1035 IsLDG = false;
1036 break;
1037 }
1038 } else {
1039 Op1 = N->getOperand(1);
1040 Mem = cast<MemSDNode>(N);
1041 }
1042
1043 unsigned Opcode;
1044 SDLoc DL(N);
1045 SDNode *LD;
1046 SDValue Base, Offset, Addr;
1047
1048 EVT EltVT = Mem->getMemoryVT();
1049 if (EltVT.isVector()) {
1050 EltVT = EltVT.getVectorElementType();
1051 }
1052
1053 if (SelectDirectAddr(Op1, Addr)) {
1054 switch (N->getOpcode()) {
1055 default:
1056 return nullptr;
1057 case ISD::INTRINSIC_W_CHAIN:
1058 if (IsLDG) {
1059 switch (EltVT.getSimpleVT().SimpleTy) {
1060 default:
1061 return nullptr;
1062 case MVT::i8:
1063 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1064 break;
1065 case MVT::i16:
1066 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1067 break;
1068 case MVT::i32:
1069 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1070 break;
1071 case MVT::i64:
1072 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1073 break;
1074 case MVT::f32:
1075 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1076 break;
1077 case MVT::f64:
1078 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1079 break;
1080 }
1081 } else {
1082 switch (EltVT.getSimpleVT().SimpleTy) {
1083 default:
1084 return nullptr;
1085 case MVT::i8:
1086 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1087 break;
1088 case MVT::i16:
1089 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1090 break;
1091 case MVT::i32:
1092 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1093 break;
1094 case MVT::i64:
1095 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1096 break;
1097 case MVT::f32:
1098 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1099 break;
1100 case MVT::f64:
1101 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1102 break;
1103 }
1104 }
1105 break;
1106 case NVPTXISD::LDGV2:
1107 switch (EltVT.getSimpleVT().SimpleTy) {
1108 default:
1109 return nullptr;
1110 case MVT::i8:
1111 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
1112 break;
1113 case MVT::i16:
1114 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
1115 break;
1116 case MVT::i32:
1117 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
1118 break;
1119 case MVT::i64:
1120 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
1121 break;
1122 case MVT::f32:
1123 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
1124 break;
1125 case MVT::f64:
1126 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
1127 break;
1128 }
1129 break;
1130 case NVPTXISD::LDUV2:
1131 switch (EltVT.getSimpleVT().SimpleTy) {
1132 default:
1133 return nullptr;
1134 case MVT::i8:
1135 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
1136 break;
1137 case MVT::i16:
1138 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
1139 break;
1140 case MVT::i32:
1141 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
1142 break;
1143 case MVT::i64:
1144 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
1145 break;
1146 case MVT::f32:
1147 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
1148 break;
1149 case MVT::f64:
1150 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1151 break;
1152 }
1153 break;
1154 case NVPTXISD::LDGV4:
1155 switch (EltVT.getSimpleVT().SimpleTy) {
1156 default:
1157 return nullptr;
1158 case MVT::i8:
1159 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1160 break;
1161 case MVT::i16:
1162 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1163 break;
1164 case MVT::i32:
1165 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1166 break;
1167 case MVT::f32:
1168 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
1169 break;
1170 }
1171 break;
1172 case NVPTXISD::LDUV4:
1173 switch (EltVT.getSimpleVT().SimpleTy) {
1174 default:
1175 return nullptr;
1176 case MVT::i8:
1177 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
1178 break;
1179 case MVT::i16:
1180 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
1181 break;
1182 case MVT::i32:
1183 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
1184 break;
1185 case MVT::f32:
1186 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
1187 break;
1188 }
1189 break;
1190 }
1191
1192 SDValue Ops[] = { Addr, Chain };
1193 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1194 } else if (Subtarget.is64Bit()
1195 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1196 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1197 if (Subtarget.is64Bit()) {
1198 switch (N->getOpcode()) {
1199 default:
1200 return nullptr;
1201 case ISD::INTRINSIC_W_CHAIN:
1202 if (IsLDG) {
1203 switch (EltVT.getSimpleVT().SimpleTy) {
1204 default:
1205 return nullptr;
1206 case MVT::i8:
1207 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1208 break;
1209 case MVT::i16:
1210 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1211 break;
1212 case MVT::i32:
1213 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1214 break;
1215 case MVT::i64:
1216 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1217 break;
1218 case MVT::f32:
1219 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1220 break;
1221 case MVT::f64:
1222 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1223 break;
1224 }
1225 } else {
1226 switch (EltVT.getSimpleVT().SimpleTy) {
1227 default:
1228 return nullptr;
1229 case MVT::i8:
1230 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1231 break;
1232 case MVT::i16:
1233 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1234 break;
1235 case MVT::i32:
1236 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1237 break;
1238 case MVT::i64:
1239 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1240 break;
1241 case MVT::f32:
1242 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1243 break;
1244 case MVT::f64:
1245 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1246 break;
1247 }
1248 }
1249 break;
1250 case NVPTXISD::LDGV2:
1251 switch (EltVT.getSimpleVT().SimpleTy) {
1252 default:
1253 return nullptr;
1254 case MVT::i8:
1255 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1256 break;
1257 case MVT::i16:
1258 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1259 break;
1260 case MVT::i32:
1261 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1262 break;
1263 case MVT::i64:
1264 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1265 break;
1266 case MVT::f32:
1267 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1268 break;
1269 case MVT::f64:
1270 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1271 break;
1272 }
1273 break;
1274 case NVPTXISD::LDUV2:
1275 switch (EltVT.getSimpleVT().SimpleTy) {
1276 default:
1277 return nullptr;
1278 case MVT::i8:
1279 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1280 break;
1281 case MVT::i16:
1282 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1283 break;
1284 case MVT::i32:
1285 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1286 break;
1287 case MVT::i64:
1288 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1289 break;
1290 case MVT::f32:
1291 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1292 break;
1293 case MVT::f64:
1294 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1295 break;
1296 }
1297 break;
1298 case NVPTXISD::LDGV4:
1299 switch (EltVT.getSimpleVT().SimpleTy) {
1300 default:
1301 return nullptr;
1302 case MVT::i8:
1303 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1304 break;
1305 case MVT::i16:
1306 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1307 break;
1308 case MVT::i32:
1309 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1310 break;
1311 case MVT::f32:
1312 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1313 break;
1314 }
1315 break;
1316 case NVPTXISD::LDUV4:
1317 switch (EltVT.getSimpleVT().SimpleTy) {
1318 default:
1319 return nullptr;
1320 case MVT::i8:
1321 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1322 break;
1323 case MVT::i16:
1324 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1325 break;
1326 case MVT::i32:
1327 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1328 break;
1329 case MVT::f32:
1330 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1331 break;
1332 }
1333 break;
1334 }
1335 } else {
1336 switch (N->getOpcode()) {
1337 default:
1338 return nullptr;
1339 case ISD::INTRINSIC_W_CHAIN:
1340 if (IsLDG) {
1341 switch (EltVT.getSimpleVT().SimpleTy) {
1342 default:
1343 return nullptr;
1344 case MVT::i8:
1345 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1346 break;
1347 case MVT::i16:
1348 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1349 break;
1350 case MVT::i32:
1351 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1352 break;
1353 case MVT::i64:
1354 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1355 break;
1356 case MVT::f32:
1357 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1358 break;
1359 case MVT::f64:
1360 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1361 break;
1362 }
1363 } else {
1364 switch (EltVT.getSimpleVT().SimpleTy) {
1365 default:
1366 return nullptr;
1367 case MVT::i8:
1368 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1369 break;
1370 case MVT::i16:
1371 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1372 break;
1373 case MVT::i32:
1374 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1375 break;
1376 case MVT::i64:
1377 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1378 break;
1379 case MVT::f32:
1380 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1381 break;
1382 case MVT::f64:
1383 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1384 break;
1385 }
1386 }
1387 break;
1388 case NVPTXISD::LDGV2:
1389 switch (EltVT.getSimpleVT().SimpleTy) {
1390 default:
1391 return nullptr;
1392 case MVT::i8:
1393 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1394 break;
1395 case MVT::i16:
1396 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1397 break;
1398 case MVT::i32:
1399 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1400 break;
1401 case MVT::i64:
1402 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1403 break;
1404 case MVT::f32:
1405 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1406 break;
1407 case MVT::f64:
1408 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1409 break;
1410 }
1411 break;
1412 case NVPTXISD::LDUV2:
1413 switch (EltVT.getSimpleVT().SimpleTy) {
1414 default:
1415 return nullptr;
1416 case MVT::i8:
1417 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1418 break;
1419 case MVT::i16:
1420 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1421 break;
1422 case MVT::i32:
1423 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1424 break;
1425 case MVT::i64:
1426 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1427 break;
1428 case MVT::f32:
1429 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1430 break;
1431 case MVT::f64:
1432 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1433 break;
1434 }
1435 break;
1436 case NVPTXISD::LDGV4:
1437 switch (EltVT.getSimpleVT().SimpleTy) {
1438 default:
1439 return nullptr;
1440 case MVT::i8:
1441 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1442 break;
1443 case MVT::i16:
1444 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1445 break;
1446 case MVT::i32:
1447 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1448 break;
1449 case MVT::f32:
1450 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1451 break;
1452 }
1453 break;
1454 case NVPTXISD::LDUV4:
1455 switch (EltVT.getSimpleVT().SimpleTy) {
1456 default:
1457 return nullptr;
1458 case MVT::i8:
1459 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1460 break;
1461 case MVT::i16:
1462 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1463 break;
1464 case MVT::i32:
1465 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1466 break;
1467 case MVT::f32:
1468 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1469 break;
1470 }
1471 break;
1472 }
1473 }
1474
1475 SDValue Ops[] = { Base, Offset, Chain };
1476
1477 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1478 } else {
1479 if (Subtarget.is64Bit()) {
1480 switch (N->getOpcode()) {
1481 default:
1482 return nullptr;
1483 case ISD::INTRINSIC_W_CHAIN:
1484 if (IsLDG) {
1485 switch (EltVT.getSimpleVT().SimpleTy) {
1486 default:
1487 return nullptr;
1488 case MVT::i8:
1489 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1490 break;
1491 case MVT::i16:
1492 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1493 break;
1494 case MVT::i32:
1495 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1496 break;
1497 case MVT::i64:
1498 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1499 break;
1500 case MVT::f32:
1501 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1502 break;
1503 case MVT::f64:
1504 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1505 break;
1506 }
1507 } else {
1508 switch (EltVT.getSimpleVT().SimpleTy) {
1509 default:
1510 return nullptr;
1511 case MVT::i8:
1512 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1513 break;
1514 case MVT::i16:
1515 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1516 break;
1517 case MVT::i32:
1518 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1519 break;
1520 case MVT::i64:
1521 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1522 break;
1523 case MVT::f32:
1524 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1525 break;
1526 case MVT::f64:
1527 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1528 break;
1529 }
1530 }
1531 break;
1532 case NVPTXISD::LDGV2:
1533 switch (EltVT.getSimpleVT().SimpleTy) {
1534 default:
1535 return nullptr;
1536 case MVT::i8:
1537 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1538 break;
1539 case MVT::i16:
1540 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1541 break;
1542 case MVT::i32:
1543 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1544 break;
1545 case MVT::i64:
1546 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1547 break;
1548 case MVT::f32:
1549 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1550 break;
1551 case MVT::f64:
1552 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1553 break;
1554 }
1555 break;
1556 case NVPTXISD::LDUV2:
1557 switch (EltVT.getSimpleVT().SimpleTy) {
1558 default:
1559 return nullptr;
1560 case MVT::i8:
1561 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1562 break;
1563 case MVT::i16:
1564 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1565 break;
1566 case MVT::i32:
1567 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1568 break;
1569 case MVT::i64:
1570 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1571 break;
1572 case MVT::f32:
1573 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1574 break;
1575 case MVT::f64:
1576 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1577 break;
1578 }
1579 break;
1580 case NVPTXISD::LDGV4:
1581 switch (EltVT.getSimpleVT().SimpleTy) {
1582 default:
1583 return nullptr;
1584 case MVT::i8:
1585 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1586 break;
1587 case MVT::i16:
1588 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1589 break;
1590 case MVT::i32:
1591 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1592 break;
1593 case MVT::f32:
1594 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1595 break;
1596 }
1597 break;
1598 case NVPTXISD::LDUV4:
1599 switch (EltVT.getSimpleVT().SimpleTy) {
1600 default:
1601 return nullptr;
1602 case MVT::i8:
1603 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1604 break;
1605 case MVT::i16:
1606 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1607 break;
1608 case MVT::i32:
1609 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1610 break;
1611 case MVT::f32:
1612 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1613 break;
1614 }
1615 break;
1616 }
1617 } else {
1618 switch (N->getOpcode()) {
1619 default:
1620 return nullptr;
1621 case ISD::INTRINSIC_W_CHAIN:
1622 if (IsLDG) {
1623 switch (EltVT.getSimpleVT().SimpleTy) {
1624 default:
1625 return nullptr;
1626 case MVT::i8:
1627 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1628 break;
1629 case MVT::i16:
1630 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1631 break;
1632 case MVT::i32:
1633 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1634 break;
1635 case MVT::i64:
1636 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1637 break;
1638 case MVT::f32:
1639 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1640 break;
1641 case MVT::f64:
1642 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1643 break;
1644 }
1645 } else {
1646 switch (EltVT.getSimpleVT().SimpleTy) {
1647 default:
1648 return nullptr;
1649 case MVT::i8:
1650 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1651 break;
1652 case MVT::i16:
1653 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1654 break;
1655 case MVT::i32:
1656 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1657 break;
1658 case MVT::i64:
1659 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1660 break;
1661 case MVT::f32:
1662 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1663 break;
1664 case MVT::f64:
1665 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1666 break;
1667 }
1668 }
1669 break;
1670 case NVPTXISD::LDGV2:
1671 switch (EltVT.getSimpleVT().SimpleTy) {
1672 default:
1673 return nullptr;
1674 case MVT::i8:
1675 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1676 break;
1677 case MVT::i16:
1678 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1679 break;
1680 case MVT::i32:
1681 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1682 break;
1683 case MVT::i64:
1684 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1685 break;
1686 case MVT::f32:
1687 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1688 break;
1689 case MVT::f64:
1690 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1691 break;
1692 }
1693 break;
1694 case NVPTXISD::LDUV2:
1695 switch (EltVT.getSimpleVT().SimpleTy) {
1696 default:
1697 return nullptr;
1698 case MVT::i8:
1699 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1700 break;
1701 case MVT::i16:
1702 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1703 break;
1704 case MVT::i32:
1705 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1706 break;
1707 case MVT::i64:
1708 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1709 break;
1710 case MVT::f32:
1711 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1712 break;
1713 case MVT::f64:
1714 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1715 break;
1716 }
1717 break;
1718 case NVPTXISD::LDGV4:
1719 switch (EltVT.getSimpleVT().SimpleTy) {
1720 default:
1721 return nullptr;
1722 case MVT::i8:
1723 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1724 break;
1725 case MVT::i16:
1726 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1727 break;
1728 case MVT::i32:
1729 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1730 break;
1731 case MVT::f32:
1732 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1733 break;
1734 }
1735 break;
1736 case NVPTXISD::LDUV4:
1737 switch (EltVT.getSimpleVT().SimpleTy) {
1738 default:
1739 return nullptr;
1740 case MVT::i8:
1741 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1742 break;
1743 case MVT::i16:
1744 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1745 break;
1746 case MVT::i32:
1747 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1748 break;
1749 case MVT::f32:
1750 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1751 break;
1752 }
1753 break;
1754 }
1755 }
1756
1757 SDValue Ops[] = { Op1, Chain };
1758 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
1759 }
1760
1761 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1762 MemRefs0[0] = Mem->getMemOperand();
1763 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1764
1765 return LD;
1766 }
1767
SelectStore(SDNode * N)1768 SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
1769 SDLoc dl(N);
1770 StoreSDNode *ST = cast<StoreSDNode>(N);
1771 EVT StoreVT = ST->getMemoryVT();
1772 SDNode *NVPTXST = nullptr;
1773
1774 // do not support pre/post inc/dec
1775 if (ST->isIndexed())
1776 return nullptr;
1777
1778 if (!StoreVT.isSimple())
1779 return nullptr;
1780
1781 // Address Space Setting
1782 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
1783
1784 // Volatile Setting
1785 // - .volatile is only availalble for .global and .shared
1786 bool isVolatile = ST->isVolatile();
1787 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1788 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1789 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1790 isVolatile = false;
1791
1792 // Vector Setting
1793 MVT SimpleVT = StoreVT.getSimpleVT();
1794 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1795 if (SimpleVT.isVector()) {
1796 unsigned num = SimpleVT.getVectorNumElements();
1797 if (num == 2)
1798 vecType = NVPTX::PTXLdStInstCode::V2;
1799 else if (num == 4)
1800 vecType = NVPTX::PTXLdStInstCode::V4;
1801 else
1802 return nullptr;
1803 }
1804
1805 // Type Setting: toType + toTypeWidth
1806 // - for integer type, always use 'u'
1807 //
1808 MVT ScalarVT = SimpleVT.getScalarType();
1809 unsigned toTypeWidth = ScalarVT.getSizeInBits();
1810 unsigned int toType;
1811 if (ScalarVT.isFloatingPoint())
1812 toType = NVPTX::PTXLdStInstCode::Float;
1813 else
1814 toType = NVPTX::PTXLdStInstCode::Unsigned;
1815
1816 // Create the machine instruction DAG
1817 SDValue Chain = N->getOperand(0);
1818 SDValue N1 = N->getOperand(1);
1819 SDValue N2 = N->getOperand(2);
1820 SDValue Addr;
1821 SDValue Offset, Base;
1822 unsigned Opcode;
1823 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
1824
1825 if (SelectDirectAddr(N2, Addr)) {
1826 switch (SourceVT) {
1827 case MVT::i8:
1828 Opcode = NVPTX::ST_i8_avar;
1829 break;
1830 case MVT::i16:
1831 Opcode = NVPTX::ST_i16_avar;
1832 break;
1833 case MVT::i32:
1834 Opcode = NVPTX::ST_i32_avar;
1835 break;
1836 case MVT::i64:
1837 Opcode = NVPTX::ST_i64_avar;
1838 break;
1839 case MVT::f32:
1840 Opcode = NVPTX::ST_f32_avar;
1841 break;
1842 case MVT::f64:
1843 Opcode = NVPTX::ST_f64_avar;
1844 break;
1845 default:
1846 return nullptr;
1847 }
1848 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1849 getI32Imm(vecType), getI32Imm(toType),
1850 getI32Imm(toTypeWidth), Addr, Chain };
1851 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1852 } else if (Subtarget.is64Bit()
1853 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1854 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
1855 switch (SourceVT) {
1856 case MVT::i8:
1857 Opcode = NVPTX::ST_i8_asi;
1858 break;
1859 case MVT::i16:
1860 Opcode = NVPTX::ST_i16_asi;
1861 break;
1862 case MVT::i32:
1863 Opcode = NVPTX::ST_i32_asi;
1864 break;
1865 case MVT::i64:
1866 Opcode = NVPTX::ST_i64_asi;
1867 break;
1868 case MVT::f32:
1869 Opcode = NVPTX::ST_f32_asi;
1870 break;
1871 case MVT::f64:
1872 Opcode = NVPTX::ST_f64_asi;
1873 break;
1874 default:
1875 return nullptr;
1876 }
1877 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1878 getI32Imm(vecType), getI32Imm(toType),
1879 getI32Imm(toTypeWidth), Base, Offset, Chain };
1880 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1881 } else if (Subtarget.is64Bit()
1882 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1883 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1884 if (Subtarget.is64Bit()) {
1885 switch (SourceVT) {
1886 case MVT::i8:
1887 Opcode = NVPTX::ST_i8_ari_64;
1888 break;
1889 case MVT::i16:
1890 Opcode = NVPTX::ST_i16_ari_64;
1891 break;
1892 case MVT::i32:
1893 Opcode = NVPTX::ST_i32_ari_64;
1894 break;
1895 case MVT::i64:
1896 Opcode = NVPTX::ST_i64_ari_64;
1897 break;
1898 case MVT::f32:
1899 Opcode = NVPTX::ST_f32_ari_64;
1900 break;
1901 case MVT::f64:
1902 Opcode = NVPTX::ST_f64_ari_64;
1903 break;
1904 default:
1905 return nullptr;
1906 }
1907 } else {
1908 switch (SourceVT) {
1909 case MVT::i8:
1910 Opcode = NVPTX::ST_i8_ari;
1911 break;
1912 case MVT::i16:
1913 Opcode = NVPTX::ST_i16_ari;
1914 break;
1915 case MVT::i32:
1916 Opcode = NVPTX::ST_i32_ari;
1917 break;
1918 case MVT::i64:
1919 Opcode = NVPTX::ST_i64_ari;
1920 break;
1921 case MVT::f32:
1922 Opcode = NVPTX::ST_f32_ari;
1923 break;
1924 case MVT::f64:
1925 Opcode = NVPTX::ST_f64_ari;
1926 break;
1927 default:
1928 return nullptr;
1929 }
1930 }
1931 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1932 getI32Imm(vecType), getI32Imm(toType),
1933 getI32Imm(toTypeWidth), Base, Offset, Chain };
1934 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1935 } else {
1936 if (Subtarget.is64Bit()) {
1937 switch (SourceVT) {
1938 case MVT::i8:
1939 Opcode = NVPTX::ST_i8_areg_64;
1940 break;
1941 case MVT::i16:
1942 Opcode = NVPTX::ST_i16_areg_64;
1943 break;
1944 case MVT::i32:
1945 Opcode = NVPTX::ST_i32_areg_64;
1946 break;
1947 case MVT::i64:
1948 Opcode = NVPTX::ST_i64_areg_64;
1949 break;
1950 case MVT::f32:
1951 Opcode = NVPTX::ST_f32_areg_64;
1952 break;
1953 case MVT::f64:
1954 Opcode = NVPTX::ST_f64_areg_64;
1955 break;
1956 default:
1957 return nullptr;
1958 }
1959 } else {
1960 switch (SourceVT) {
1961 case MVT::i8:
1962 Opcode = NVPTX::ST_i8_areg;
1963 break;
1964 case MVT::i16:
1965 Opcode = NVPTX::ST_i16_areg;
1966 break;
1967 case MVT::i32:
1968 Opcode = NVPTX::ST_i32_areg;
1969 break;
1970 case MVT::i64:
1971 Opcode = NVPTX::ST_i64_areg;
1972 break;
1973 case MVT::f32:
1974 Opcode = NVPTX::ST_f32_areg;
1975 break;
1976 case MVT::f64:
1977 Opcode = NVPTX::ST_f64_areg;
1978 break;
1979 default:
1980 return nullptr;
1981 }
1982 }
1983 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1984 getI32Imm(vecType), getI32Imm(toType),
1985 getI32Imm(toTypeWidth), N2, Chain };
1986 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
1987 }
1988
1989 if (NVPTXST) {
1990 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1991 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1992 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1993 }
1994
1995 return NVPTXST;
1996 }
1997
SelectStoreVector(SDNode * N)1998 SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1999 SDValue Chain = N->getOperand(0);
2000 SDValue Op1 = N->getOperand(1);
2001 SDValue Addr, Offset, Base;
2002 unsigned Opcode;
2003 SDLoc DL(N);
2004 SDNode *ST;
2005 EVT EltVT = Op1.getValueType();
2006 MemSDNode *MemSD = cast<MemSDNode>(N);
2007 EVT StoreVT = MemSD->getMemoryVT();
2008
2009 // Address Space Setting
2010 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
2011
2012 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2013 report_fatal_error("Cannot store to pointer that points to constant "
2014 "memory space");
2015 }
2016
2017 // Volatile Setting
2018 // - .volatile is only availalble for .global and .shared
2019 bool IsVolatile = MemSD->isVolatile();
2020 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2021 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2022 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2023 IsVolatile = false;
2024
2025 // Type Setting: toType + toTypeWidth
2026 // - for integer type, always use 'u'
2027 assert(StoreVT.isSimple() && "Store value is not simple");
2028 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
2029 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
2030 unsigned ToType;
2031 if (ScalarVT.isFloatingPoint())
2032 ToType = NVPTX::PTXLdStInstCode::Float;
2033 else
2034 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2035
2036 SmallVector<SDValue, 12> StOps;
2037 SDValue N2;
2038 unsigned VecType;
2039
2040 switch (N->getOpcode()) {
2041 case NVPTXISD::StoreV2:
2042 VecType = NVPTX::PTXLdStInstCode::V2;
2043 StOps.push_back(N->getOperand(1));
2044 StOps.push_back(N->getOperand(2));
2045 N2 = N->getOperand(3);
2046 break;
2047 case NVPTXISD::StoreV4:
2048 VecType = NVPTX::PTXLdStInstCode::V4;
2049 StOps.push_back(N->getOperand(1));
2050 StOps.push_back(N->getOperand(2));
2051 StOps.push_back(N->getOperand(3));
2052 StOps.push_back(N->getOperand(4));
2053 N2 = N->getOperand(5);
2054 break;
2055 default:
2056 return nullptr;
2057 }
2058
2059 StOps.push_back(getI32Imm(IsVolatile));
2060 StOps.push_back(getI32Imm(CodeAddrSpace));
2061 StOps.push_back(getI32Imm(VecType));
2062 StOps.push_back(getI32Imm(ToType));
2063 StOps.push_back(getI32Imm(ToTypeWidth));
2064
2065 if (SelectDirectAddr(N2, Addr)) {
2066 switch (N->getOpcode()) {
2067 default:
2068 return nullptr;
2069 case NVPTXISD::StoreV2:
2070 switch (EltVT.getSimpleVT().SimpleTy) {
2071 default:
2072 return nullptr;
2073 case MVT::i8:
2074 Opcode = NVPTX::STV_i8_v2_avar;
2075 break;
2076 case MVT::i16:
2077 Opcode = NVPTX::STV_i16_v2_avar;
2078 break;
2079 case MVT::i32:
2080 Opcode = NVPTX::STV_i32_v2_avar;
2081 break;
2082 case MVT::i64:
2083 Opcode = NVPTX::STV_i64_v2_avar;
2084 break;
2085 case MVT::f32:
2086 Opcode = NVPTX::STV_f32_v2_avar;
2087 break;
2088 case MVT::f64:
2089 Opcode = NVPTX::STV_f64_v2_avar;
2090 break;
2091 }
2092 break;
2093 case NVPTXISD::StoreV4:
2094 switch (EltVT.getSimpleVT().SimpleTy) {
2095 default:
2096 return nullptr;
2097 case MVT::i8:
2098 Opcode = NVPTX::STV_i8_v4_avar;
2099 break;
2100 case MVT::i16:
2101 Opcode = NVPTX::STV_i16_v4_avar;
2102 break;
2103 case MVT::i32:
2104 Opcode = NVPTX::STV_i32_v4_avar;
2105 break;
2106 case MVT::f32:
2107 Opcode = NVPTX::STV_f32_v4_avar;
2108 break;
2109 }
2110 break;
2111 }
2112 StOps.push_back(Addr);
2113 } else if (Subtarget.is64Bit()
2114 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2115 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
2116 switch (N->getOpcode()) {
2117 default:
2118 return nullptr;
2119 case NVPTXISD::StoreV2:
2120 switch (EltVT.getSimpleVT().SimpleTy) {
2121 default:
2122 return nullptr;
2123 case MVT::i8:
2124 Opcode = NVPTX::STV_i8_v2_asi;
2125 break;
2126 case MVT::i16:
2127 Opcode = NVPTX::STV_i16_v2_asi;
2128 break;
2129 case MVT::i32:
2130 Opcode = NVPTX::STV_i32_v2_asi;
2131 break;
2132 case MVT::i64:
2133 Opcode = NVPTX::STV_i64_v2_asi;
2134 break;
2135 case MVT::f32:
2136 Opcode = NVPTX::STV_f32_v2_asi;
2137 break;
2138 case MVT::f64:
2139 Opcode = NVPTX::STV_f64_v2_asi;
2140 break;
2141 }
2142 break;
2143 case NVPTXISD::StoreV4:
2144 switch (EltVT.getSimpleVT().SimpleTy) {
2145 default:
2146 return nullptr;
2147 case MVT::i8:
2148 Opcode = NVPTX::STV_i8_v4_asi;
2149 break;
2150 case MVT::i16:
2151 Opcode = NVPTX::STV_i16_v4_asi;
2152 break;
2153 case MVT::i32:
2154 Opcode = NVPTX::STV_i32_v4_asi;
2155 break;
2156 case MVT::f32:
2157 Opcode = NVPTX::STV_f32_v4_asi;
2158 break;
2159 }
2160 break;
2161 }
2162 StOps.push_back(Base);
2163 StOps.push_back(Offset);
2164 } else if (Subtarget.is64Bit()
2165 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2166 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2167 if (Subtarget.is64Bit()) {
2168 switch (N->getOpcode()) {
2169 default:
2170 return nullptr;
2171 case NVPTXISD::StoreV2:
2172 switch (EltVT.getSimpleVT().SimpleTy) {
2173 default:
2174 return nullptr;
2175 case MVT::i8:
2176 Opcode = NVPTX::STV_i8_v2_ari_64;
2177 break;
2178 case MVT::i16:
2179 Opcode = NVPTX::STV_i16_v2_ari_64;
2180 break;
2181 case MVT::i32:
2182 Opcode = NVPTX::STV_i32_v2_ari_64;
2183 break;
2184 case MVT::i64:
2185 Opcode = NVPTX::STV_i64_v2_ari_64;
2186 break;
2187 case MVT::f32:
2188 Opcode = NVPTX::STV_f32_v2_ari_64;
2189 break;
2190 case MVT::f64:
2191 Opcode = NVPTX::STV_f64_v2_ari_64;
2192 break;
2193 }
2194 break;
2195 case NVPTXISD::StoreV4:
2196 switch (EltVT.getSimpleVT().SimpleTy) {
2197 default:
2198 return nullptr;
2199 case MVT::i8:
2200 Opcode = NVPTX::STV_i8_v4_ari_64;
2201 break;
2202 case MVT::i16:
2203 Opcode = NVPTX::STV_i16_v4_ari_64;
2204 break;
2205 case MVT::i32:
2206 Opcode = NVPTX::STV_i32_v4_ari_64;
2207 break;
2208 case MVT::f32:
2209 Opcode = NVPTX::STV_f32_v4_ari_64;
2210 break;
2211 }
2212 break;
2213 }
2214 } else {
2215 switch (N->getOpcode()) {
2216 default:
2217 return nullptr;
2218 case NVPTXISD::StoreV2:
2219 switch (EltVT.getSimpleVT().SimpleTy) {
2220 default:
2221 return nullptr;
2222 case MVT::i8:
2223 Opcode = NVPTX::STV_i8_v2_ari;
2224 break;
2225 case MVT::i16:
2226 Opcode = NVPTX::STV_i16_v2_ari;
2227 break;
2228 case MVT::i32:
2229 Opcode = NVPTX::STV_i32_v2_ari;
2230 break;
2231 case MVT::i64:
2232 Opcode = NVPTX::STV_i64_v2_ari;
2233 break;
2234 case MVT::f32:
2235 Opcode = NVPTX::STV_f32_v2_ari;
2236 break;
2237 case MVT::f64:
2238 Opcode = NVPTX::STV_f64_v2_ari;
2239 break;
2240 }
2241 break;
2242 case NVPTXISD::StoreV4:
2243 switch (EltVT.getSimpleVT().SimpleTy) {
2244 default:
2245 return nullptr;
2246 case MVT::i8:
2247 Opcode = NVPTX::STV_i8_v4_ari;
2248 break;
2249 case MVT::i16:
2250 Opcode = NVPTX::STV_i16_v4_ari;
2251 break;
2252 case MVT::i32:
2253 Opcode = NVPTX::STV_i32_v4_ari;
2254 break;
2255 case MVT::f32:
2256 Opcode = NVPTX::STV_f32_v4_ari;
2257 break;
2258 }
2259 break;
2260 }
2261 }
2262 StOps.push_back(Base);
2263 StOps.push_back(Offset);
2264 } else {
2265 if (Subtarget.is64Bit()) {
2266 switch (N->getOpcode()) {
2267 default:
2268 return nullptr;
2269 case NVPTXISD::StoreV2:
2270 switch (EltVT.getSimpleVT().SimpleTy) {
2271 default:
2272 return nullptr;
2273 case MVT::i8:
2274 Opcode = NVPTX::STV_i8_v2_areg_64;
2275 break;
2276 case MVT::i16:
2277 Opcode = NVPTX::STV_i16_v2_areg_64;
2278 break;
2279 case MVT::i32:
2280 Opcode = NVPTX::STV_i32_v2_areg_64;
2281 break;
2282 case MVT::i64:
2283 Opcode = NVPTX::STV_i64_v2_areg_64;
2284 break;
2285 case MVT::f32:
2286 Opcode = NVPTX::STV_f32_v2_areg_64;
2287 break;
2288 case MVT::f64:
2289 Opcode = NVPTX::STV_f64_v2_areg_64;
2290 break;
2291 }
2292 break;
2293 case NVPTXISD::StoreV4:
2294 switch (EltVT.getSimpleVT().SimpleTy) {
2295 default:
2296 return nullptr;
2297 case MVT::i8:
2298 Opcode = NVPTX::STV_i8_v4_areg_64;
2299 break;
2300 case MVT::i16:
2301 Opcode = NVPTX::STV_i16_v4_areg_64;
2302 break;
2303 case MVT::i32:
2304 Opcode = NVPTX::STV_i32_v4_areg_64;
2305 break;
2306 case MVT::f32:
2307 Opcode = NVPTX::STV_f32_v4_areg_64;
2308 break;
2309 }
2310 break;
2311 }
2312 } else {
2313 switch (N->getOpcode()) {
2314 default:
2315 return nullptr;
2316 case NVPTXISD::StoreV2:
2317 switch (EltVT.getSimpleVT().SimpleTy) {
2318 default:
2319 return nullptr;
2320 case MVT::i8:
2321 Opcode = NVPTX::STV_i8_v2_areg;
2322 break;
2323 case MVT::i16:
2324 Opcode = NVPTX::STV_i16_v2_areg;
2325 break;
2326 case MVT::i32:
2327 Opcode = NVPTX::STV_i32_v2_areg;
2328 break;
2329 case MVT::i64:
2330 Opcode = NVPTX::STV_i64_v2_areg;
2331 break;
2332 case MVT::f32:
2333 Opcode = NVPTX::STV_f32_v2_areg;
2334 break;
2335 case MVT::f64:
2336 Opcode = NVPTX::STV_f64_v2_areg;
2337 break;
2338 }
2339 break;
2340 case NVPTXISD::StoreV4:
2341 switch (EltVT.getSimpleVT().SimpleTy) {
2342 default:
2343 return nullptr;
2344 case MVT::i8:
2345 Opcode = NVPTX::STV_i8_v4_areg;
2346 break;
2347 case MVT::i16:
2348 Opcode = NVPTX::STV_i16_v4_areg;
2349 break;
2350 case MVT::i32:
2351 Opcode = NVPTX::STV_i32_v4_areg;
2352 break;
2353 case MVT::f32:
2354 Opcode = NVPTX::STV_f32_v4_areg;
2355 break;
2356 }
2357 break;
2358 }
2359 }
2360 StOps.push_back(N2);
2361 }
2362
2363 StOps.push_back(Chain);
2364
2365 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
2366
2367 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2368 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2369 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2370
2371 return ST;
2372 }
2373
SelectLoadParam(SDNode * Node)2374 SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2375 SDValue Chain = Node->getOperand(0);
2376 SDValue Offset = Node->getOperand(2);
2377 SDValue Flag = Node->getOperand(3);
2378 SDLoc DL(Node);
2379 MemSDNode *Mem = cast<MemSDNode>(Node);
2380
2381 unsigned VecSize;
2382 switch (Node->getOpcode()) {
2383 default:
2384 return nullptr;
2385 case NVPTXISD::LoadParam:
2386 VecSize = 1;
2387 break;
2388 case NVPTXISD::LoadParamV2:
2389 VecSize = 2;
2390 break;
2391 case NVPTXISD::LoadParamV4:
2392 VecSize = 4;
2393 break;
2394 }
2395
2396 EVT EltVT = Node->getValueType(0);
2397 EVT MemVT = Mem->getMemoryVT();
2398
2399 unsigned Opc = 0;
2400
2401 switch (VecSize) {
2402 default:
2403 return nullptr;
2404 case 1:
2405 switch (MemVT.getSimpleVT().SimpleTy) {
2406 default:
2407 return nullptr;
2408 case MVT::i1:
2409 Opc = NVPTX::LoadParamMemI8;
2410 break;
2411 case MVT::i8:
2412 Opc = NVPTX::LoadParamMemI8;
2413 break;
2414 case MVT::i16:
2415 Opc = NVPTX::LoadParamMemI16;
2416 break;
2417 case MVT::i32:
2418 Opc = NVPTX::LoadParamMemI32;
2419 break;
2420 case MVT::i64:
2421 Opc = NVPTX::LoadParamMemI64;
2422 break;
2423 case MVT::f32:
2424 Opc = NVPTX::LoadParamMemF32;
2425 break;
2426 case MVT::f64:
2427 Opc = NVPTX::LoadParamMemF64;
2428 break;
2429 }
2430 break;
2431 case 2:
2432 switch (MemVT.getSimpleVT().SimpleTy) {
2433 default:
2434 return nullptr;
2435 case MVT::i1:
2436 Opc = NVPTX::LoadParamMemV2I8;
2437 break;
2438 case MVT::i8:
2439 Opc = NVPTX::LoadParamMemV2I8;
2440 break;
2441 case MVT::i16:
2442 Opc = NVPTX::LoadParamMemV2I16;
2443 break;
2444 case MVT::i32:
2445 Opc = NVPTX::LoadParamMemV2I32;
2446 break;
2447 case MVT::i64:
2448 Opc = NVPTX::LoadParamMemV2I64;
2449 break;
2450 case MVT::f32:
2451 Opc = NVPTX::LoadParamMemV2F32;
2452 break;
2453 case MVT::f64:
2454 Opc = NVPTX::LoadParamMemV2F64;
2455 break;
2456 }
2457 break;
2458 case 4:
2459 switch (MemVT.getSimpleVT().SimpleTy) {
2460 default:
2461 return nullptr;
2462 case MVT::i1:
2463 Opc = NVPTX::LoadParamMemV4I8;
2464 break;
2465 case MVT::i8:
2466 Opc = NVPTX::LoadParamMemV4I8;
2467 break;
2468 case MVT::i16:
2469 Opc = NVPTX::LoadParamMemV4I16;
2470 break;
2471 case MVT::i32:
2472 Opc = NVPTX::LoadParamMemV4I32;
2473 break;
2474 case MVT::f32:
2475 Opc = NVPTX::LoadParamMemV4F32;
2476 break;
2477 }
2478 break;
2479 }
2480
2481 SDVTList VTs;
2482 if (VecSize == 1) {
2483 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2484 } else if (VecSize == 2) {
2485 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2486 } else {
2487 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
2488 VTs = CurDAG->getVTList(EVTs);
2489 }
2490
2491 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2492
2493 SmallVector<SDValue, 2> Ops;
2494 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2495 Ops.push_back(Chain);
2496 Ops.push_back(Flag);
2497
2498 SDNode *Ret =
2499 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
2500 return Ret;
2501 }
2502
SelectStoreRetval(SDNode * N)2503 SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2504 SDLoc DL(N);
2505 SDValue Chain = N->getOperand(0);
2506 SDValue Offset = N->getOperand(1);
2507 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2508 MemSDNode *Mem = cast<MemSDNode>(N);
2509
2510 // How many elements do we have?
2511 unsigned NumElts = 1;
2512 switch (N->getOpcode()) {
2513 default:
2514 return nullptr;
2515 case NVPTXISD::StoreRetval:
2516 NumElts = 1;
2517 break;
2518 case NVPTXISD::StoreRetvalV2:
2519 NumElts = 2;
2520 break;
2521 case NVPTXISD::StoreRetvalV4:
2522 NumElts = 4;
2523 break;
2524 }
2525
2526 // Build vector of operands
2527 SmallVector<SDValue, 6> Ops;
2528 for (unsigned i = 0; i < NumElts; ++i)
2529 Ops.push_back(N->getOperand(i + 2));
2530 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2531 Ops.push_back(Chain);
2532
2533 // Determine target opcode
2534 // If we have an i1, use an 8-bit store. The lowering code in
2535 // NVPTXISelLowering will have already emitted an upcast.
2536 unsigned Opcode = 0;
2537 switch (NumElts) {
2538 default:
2539 return nullptr;
2540 case 1:
2541 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2542 default:
2543 return nullptr;
2544 case MVT::i1:
2545 Opcode = NVPTX::StoreRetvalI8;
2546 break;
2547 case MVT::i8:
2548 Opcode = NVPTX::StoreRetvalI8;
2549 break;
2550 case MVT::i16:
2551 Opcode = NVPTX::StoreRetvalI16;
2552 break;
2553 case MVT::i32:
2554 Opcode = NVPTX::StoreRetvalI32;
2555 break;
2556 case MVT::i64:
2557 Opcode = NVPTX::StoreRetvalI64;
2558 break;
2559 case MVT::f32:
2560 Opcode = NVPTX::StoreRetvalF32;
2561 break;
2562 case MVT::f64:
2563 Opcode = NVPTX::StoreRetvalF64;
2564 break;
2565 }
2566 break;
2567 case 2:
2568 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2569 default:
2570 return nullptr;
2571 case MVT::i1:
2572 Opcode = NVPTX::StoreRetvalV2I8;
2573 break;
2574 case MVT::i8:
2575 Opcode = NVPTX::StoreRetvalV2I8;
2576 break;
2577 case MVT::i16:
2578 Opcode = NVPTX::StoreRetvalV2I16;
2579 break;
2580 case MVT::i32:
2581 Opcode = NVPTX::StoreRetvalV2I32;
2582 break;
2583 case MVT::i64:
2584 Opcode = NVPTX::StoreRetvalV2I64;
2585 break;
2586 case MVT::f32:
2587 Opcode = NVPTX::StoreRetvalV2F32;
2588 break;
2589 case MVT::f64:
2590 Opcode = NVPTX::StoreRetvalV2F64;
2591 break;
2592 }
2593 break;
2594 case 4:
2595 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2596 default:
2597 return nullptr;
2598 case MVT::i1:
2599 Opcode = NVPTX::StoreRetvalV4I8;
2600 break;
2601 case MVT::i8:
2602 Opcode = NVPTX::StoreRetvalV4I8;
2603 break;
2604 case MVT::i16:
2605 Opcode = NVPTX::StoreRetvalV4I16;
2606 break;
2607 case MVT::i32:
2608 Opcode = NVPTX::StoreRetvalV4I32;
2609 break;
2610 case MVT::f32:
2611 Opcode = NVPTX::StoreRetvalV4F32;
2612 break;
2613 }
2614 break;
2615 }
2616
2617 SDNode *Ret =
2618 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2619 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2620 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2621 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2622
2623 return Ret;
2624 }
2625
SelectStoreParam(SDNode * N)2626 SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2627 SDLoc DL(N);
2628 SDValue Chain = N->getOperand(0);
2629 SDValue Param = N->getOperand(1);
2630 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2631 SDValue Offset = N->getOperand(2);
2632 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2633 MemSDNode *Mem = cast<MemSDNode>(N);
2634 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2635
2636 // How many elements do we have?
2637 unsigned NumElts = 1;
2638 switch (N->getOpcode()) {
2639 default:
2640 return nullptr;
2641 case NVPTXISD::StoreParamU32:
2642 case NVPTXISD::StoreParamS32:
2643 case NVPTXISD::StoreParam:
2644 NumElts = 1;
2645 break;
2646 case NVPTXISD::StoreParamV2:
2647 NumElts = 2;
2648 break;
2649 case NVPTXISD::StoreParamV4:
2650 NumElts = 4;
2651 break;
2652 }
2653
2654 // Build vector of operands
2655 SmallVector<SDValue, 8> Ops;
2656 for (unsigned i = 0; i < NumElts; ++i)
2657 Ops.push_back(N->getOperand(i + 3));
2658 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2659 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2660 Ops.push_back(Chain);
2661 Ops.push_back(Flag);
2662
2663 // Determine target opcode
2664 // If we have an i1, use an 8-bit store. The lowering code in
2665 // NVPTXISelLowering will have already emitted an upcast.
2666 unsigned Opcode = 0;
2667 switch (N->getOpcode()) {
2668 default:
2669 switch (NumElts) {
2670 default:
2671 return nullptr;
2672 case 1:
2673 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2674 default:
2675 return nullptr;
2676 case MVT::i1:
2677 Opcode = NVPTX::StoreParamI8;
2678 break;
2679 case MVT::i8:
2680 Opcode = NVPTX::StoreParamI8;
2681 break;
2682 case MVT::i16:
2683 Opcode = NVPTX::StoreParamI16;
2684 break;
2685 case MVT::i32:
2686 Opcode = NVPTX::StoreParamI32;
2687 break;
2688 case MVT::i64:
2689 Opcode = NVPTX::StoreParamI64;
2690 break;
2691 case MVT::f32:
2692 Opcode = NVPTX::StoreParamF32;
2693 break;
2694 case MVT::f64:
2695 Opcode = NVPTX::StoreParamF64;
2696 break;
2697 }
2698 break;
2699 case 2:
2700 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2701 default:
2702 return nullptr;
2703 case MVT::i1:
2704 Opcode = NVPTX::StoreParamV2I8;
2705 break;
2706 case MVT::i8:
2707 Opcode = NVPTX::StoreParamV2I8;
2708 break;
2709 case MVT::i16:
2710 Opcode = NVPTX::StoreParamV2I16;
2711 break;
2712 case MVT::i32:
2713 Opcode = NVPTX::StoreParamV2I32;
2714 break;
2715 case MVT::i64:
2716 Opcode = NVPTX::StoreParamV2I64;
2717 break;
2718 case MVT::f32:
2719 Opcode = NVPTX::StoreParamV2F32;
2720 break;
2721 case MVT::f64:
2722 Opcode = NVPTX::StoreParamV2F64;
2723 break;
2724 }
2725 break;
2726 case 4:
2727 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2728 default:
2729 return nullptr;
2730 case MVT::i1:
2731 Opcode = NVPTX::StoreParamV4I8;
2732 break;
2733 case MVT::i8:
2734 Opcode = NVPTX::StoreParamV4I8;
2735 break;
2736 case MVT::i16:
2737 Opcode = NVPTX::StoreParamV4I16;
2738 break;
2739 case MVT::i32:
2740 Opcode = NVPTX::StoreParamV4I32;
2741 break;
2742 case MVT::f32:
2743 Opcode = NVPTX::StoreParamV4F32;
2744 break;
2745 }
2746 break;
2747 }
2748 break;
2749 // Special case: if we have a sign-extend/zero-extend node, insert the
2750 // conversion instruction first, and use that as the value operand to
2751 // the selected StoreParam node.
2752 case NVPTXISD::StoreParamU32: {
2753 Opcode = NVPTX::StoreParamI32;
2754 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2755 MVT::i32);
2756 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2757 MVT::i32, Ops[0], CvtNone);
2758 Ops[0] = SDValue(Cvt, 0);
2759 break;
2760 }
2761 case NVPTXISD::StoreParamS32: {
2762 Opcode = NVPTX::StoreParamI32;
2763 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2764 MVT::i32);
2765 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2766 MVT::i32, Ops[0], CvtNone);
2767 Ops[0] = SDValue(Cvt, 0);
2768 break;
2769 }
2770 }
2771
2772 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
2773 SDNode *Ret =
2774 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
2775 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2776 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2777 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2778
2779 return Ret;
2780 }
2781
SelectTextureIntrinsic(SDNode * N)2782 SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
2783 SDValue Chain = N->getOperand(0);
2784 SDValue TexRef = N->getOperand(1);
2785 SDValue SampRef = N->getOperand(2);
2786 SDNode *Ret = nullptr;
2787 unsigned Opc = 0;
2788 SmallVector<SDValue, 8> Ops;
2789
2790 switch (N->getOpcode()) {
2791 default: return nullptr;
2792 case NVPTXISD::Tex1DFloatI32:
2793 Opc = NVPTX::TEX_1D_F32_I32;
2794 break;
2795 case NVPTXISD::Tex1DFloatFloat:
2796 Opc = NVPTX::TEX_1D_F32_F32;
2797 break;
2798 case NVPTXISD::Tex1DFloatFloatLevel:
2799 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2800 break;
2801 case NVPTXISD::Tex1DFloatFloatGrad:
2802 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2803 break;
2804 case NVPTXISD::Tex1DI32I32:
2805 Opc = NVPTX::TEX_1D_I32_I32;
2806 break;
2807 case NVPTXISD::Tex1DI32Float:
2808 Opc = NVPTX::TEX_1D_I32_F32;
2809 break;
2810 case NVPTXISD::Tex1DI32FloatLevel:
2811 Opc = NVPTX::TEX_1D_I32_F32_LEVEL;
2812 break;
2813 case NVPTXISD::Tex1DI32FloatGrad:
2814 Opc = NVPTX::TEX_1D_I32_F32_GRAD;
2815 break;
2816 case NVPTXISD::Tex1DArrayFloatI32:
2817 Opc = NVPTX::TEX_1D_ARRAY_F32_I32;
2818 break;
2819 case NVPTXISD::Tex1DArrayFloatFloat:
2820 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2821 break;
2822 case NVPTXISD::Tex1DArrayFloatFloatLevel:
2823 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2824 break;
2825 case NVPTXISD::Tex1DArrayFloatFloatGrad:
2826 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2827 break;
2828 case NVPTXISD::Tex1DArrayI32I32:
2829 Opc = NVPTX::TEX_1D_ARRAY_I32_I32;
2830 break;
2831 case NVPTXISD::Tex1DArrayI32Float:
2832 Opc = NVPTX::TEX_1D_ARRAY_I32_F32;
2833 break;
2834 case NVPTXISD::Tex1DArrayI32FloatLevel:
2835 Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL;
2836 break;
2837 case NVPTXISD::Tex1DArrayI32FloatGrad:
2838 Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD;
2839 break;
2840 case NVPTXISD::Tex2DFloatI32:
2841 Opc = NVPTX::TEX_2D_F32_I32;
2842 break;
2843 case NVPTXISD::Tex2DFloatFloat:
2844 Opc = NVPTX::TEX_2D_F32_F32;
2845 break;
2846 case NVPTXISD::Tex2DFloatFloatLevel:
2847 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2848 break;
2849 case NVPTXISD::Tex2DFloatFloatGrad:
2850 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2851 break;
2852 case NVPTXISD::Tex2DI32I32:
2853 Opc = NVPTX::TEX_2D_I32_I32;
2854 break;
2855 case NVPTXISD::Tex2DI32Float:
2856 Opc = NVPTX::TEX_2D_I32_F32;
2857 break;
2858 case NVPTXISD::Tex2DI32FloatLevel:
2859 Opc = NVPTX::TEX_2D_I32_F32_LEVEL;
2860 break;
2861 case NVPTXISD::Tex2DI32FloatGrad:
2862 Opc = NVPTX::TEX_2D_I32_F32_GRAD;
2863 break;
2864 case NVPTXISD::Tex2DArrayFloatI32:
2865 Opc = NVPTX::TEX_2D_ARRAY_F32_I32;
2866 break;
2867 case NVPTXISD::Tex2DArrayFloatFloat:
2868 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2869 break;
2870 case NVPTXISD::Tex2DArrayFloatFloatLevel:
2871 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2872 break;
2873 case NVPTXISD::Tex2DArrayFloatFloatGrad:
2874 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2875 break;
2876 case NVPTXISD::Tex2DArrayI32I32:
2877 Opc = NVPTX::TEX_2D_ARRAY_I32_I32;
2878 break;
2879 case NVPTXISD::Tex2DArrayI32Float:
2880 Opc = NVPTX::TEX_2D_ARRAY_I32_F32;
2881 break;
2882 case NVPTXISD::Tex2DArrayI32FloatLevel:
2883 Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL;
2884 break;
2885 case NVPTXISD::Tex2DArrayI32FloatGrad:
2886 Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD;
2887 break;
2888 case NVPTXISD::Tex3DFloatI32:
2889 Opc = NVPTX::TEX_3D_F32_I32;
2890 break;
2891 case NVPTXISD::Tex3DFloatFloat:
2892 Opc = NVPTX::TEX_3D_F32_F32;
2893 break;
2894 case NVPTXISD::Tex3DFloatFloatLevel:
2895 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2896 break;
2897 case NVPTXISD::Tex3DFloatFloatGrad:
2898 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2899 break;
2900 case NVPTXISD::Tex3DI32I32:
2901 Opc = NVPTX::TEX_3D_I32_I32;
2902 break;
2903 case NVPTXISD::Tex3DI32Float:
2904 Opc = NVPTX::TEX_3D_I32_F32;
2905 break;
2906 case NVPTXISD::Tex3DI32FloatLevel:
2907 Opc = NVPTX::TEX_3D_I32_F32_LEVEL;
2908 break;
2909 case NVPTXISD::Tex3DI32FloatGrad:
2910 Opc = NVPTX::TEX_3D_I32_F32_GRAD;
2911 break;
2912 }
2913
2914 Ops.push_back(TexRef);
2915 Ops.push_back(SampRef);
2916
2917 // Copy over indices
2918 for (unsigned i = 3; i < N->getNumOperands(); ++i) {
2919 Ops.push_back(N->getOperand(i));
2920 }
2921
2922 Ops.push_back(Chain);
2923 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
2924 return Ret;
2925 }
2926
SelectSurfaceIntrinsic(SDNode * N)2927 SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
2928 SDValue Chain = N->getOperand(0);
2929 SDValue TexHandle = N->getOperand(1);
2930 SDNode *Ret = nullptr;
2931 unsigned Opc = 0;
2932 SmallVector<SDValue, 8> Ops;
2933 switch (N->getOpcode()) {
2934 default: return nullptr;
2935 case NVPTXISD::Suld1DI8Trap:
2936 Opc = NVPTX::SULD_1D_I8_TRAP;
2937 Ops.push_back(TexHandle);
2938 Ops.push_back(N->getOperand(2));
2939 Ops.push_back(Chain);
2940 break;
2941 case NVPTXISD::Suld1DI16Trap:
2942 Opc = NVPTX::SULD_1D_I16_TRAP;
2943 Ops.push_back(TexHandle);
2944 Ops.push_back(N->getOperand(2));
2945 Ops.push_back(Chain);
2946 break;
2947 case NVPTXISD::Suld1DI32Trap:
2948 Opc = NVPTX::SULD_1D_I32_TRAP;
2949 Ops.push_back(TexHandle);
2950 Ops.push_back(N->getOperand(2));
2951 Ops.push_back(Chain);
2952 break;
2953 case NVPTXISD::Suld1DV2I8Trap:
2954 Opc = NVPTX::SULD_1D_V2I8_TRAP;
2955 Ops.push_back(TexHandle);
2956 Ops.push_back(N->getOperand(2));
2957 Ops.push_back(Chain);
2958 break;
2959 case NVPTXISD::Suld1DV2I16Trap:
2960 Opc = NVPTX::SULD_1D_V2I16_TRAP;
2961 Ops.push_back(TexHandle);
2962 Ops.push_back(N->getOperand(2));
2963 Ops.push_back(Chain);
2964 break;
2965 case NVPTXISD::Suld1DV2I32Trap:
2966 Opc = NVPTX::SULD_1D_V2I32_TRAP;
2967 Ops.push_back(TexHandle);
2968 Ops.push_back(N->getOperand(2));
2969 Ops.push_back(Chain);
2970 break;
2971 case NVPTXISD::Suld1DV4I8Trap:
2972 Opc = NVPTX::SULD_1D_V4I8_TRAP;
2973 Ops.push_back(TexHandle);
2974 Ops.push_back(N->getOperand(2));
2975 Ops.push_back(Chain);
2976 break;
2977 case NVPTXISD::Suld1DV4I16Trap:
2978 Opc = NVPTX::SULD_1D_V4I16_TRAP;
2979 Ops.push_back(TexHandle);
2980 Ops.push_back(N->getOperand(2));
2981 Ops.push_back(Chain);
2982 break;
2983 case NVPTXISD::Suld1DV4I32Trap:
2984 Opc = NVPTX::SULD_1D_V4I32_TRAP;
2985 Ops.push_back(TexHandle);
2986 Ops.push_back(N->getOperand(2));
2987 Ops.push_back(Chain);
2988 break;
2989 case NVPTXISD::Suld1DArrayI8Trap:
2990 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
2991 Ops.push_back(TexHandle);
2992 Ops.push_back(N->getOperand(2));
2993 Ops.push_back(N->getOperand(3));
2994 Ops.push_back(Chain);
2995 break;
2996 case NVPTXISD::Suld1DArrayI16Trap:
2997 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
2998 Ops.push_back(TexHandle);
2999 Ops.push_back(N->getOperand(2));
3000 Ops.push_back(N->getOperand(3));
3001 Ops.push_back(Chain);
3002 break;
3003 case NVPTXISD::Suld1DArrayI32Trap:
3004 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
3005 Ops.push_back(TexHandle);
3006 Ops.push_back(N->getOperand(2));
3007 Ops.push_back(N->getOperand(3));
3008 Ops.push_back(Chain);
3009 break;
3010 case NVPTXISD::Suld1DArrayV2I8Trap:
3011 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
3012 Ops.push_back(TexHandle);
3013 Ops.push_back(N->getOperand(2));
3014 Ops.push_back(N->getOperand(3));
3015 Ops.push_back(Chain);
3016 break;
3017 case NVPTXISD::Suld1DArrayV2I16Trap:
3018 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
3019 Ops.push_back(TexHandle);
3020 Ops.push_back(N->getOperand(2));
3021 Ops.push_back(N->getOperand(3));
3022 Ops.push_back(Chain);
3023 break;
3024 case NVPTXISD::Suld1DArrayV2I32Trap:
3025 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
3026 Ops.push_back(TexHandle);
3027 Ops.push_back(N->getOperand(2));
3028 Ops.push_back(N->getOperand(3));
3029 Ops.push_back(Chain);
3030 break;
3031 case NVPTXISD::Suld1DArrayV4I8Trap:
3032 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
3033 Ops.push_back(TexHandle);
3034 Ops.push_back(N->getOperand(2));
3035 Ops.push_back(N->getOperand(3));
3036 Ops.push_back(Chain);
3037 break;
3038 case NVPTXISD::Suld1DArrayV4I16Trap:
3039 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
3040 Ops.push_back(TexHandle);
3041 Ops.push_back(N->getOperand(2));
3042 Ops.push_back(N->getOperand(3));
3043 Ops.push_back(Chain);
3044 break;
3045 case NVPTXISD::Suld1DArrayV4I32Trap:
3046 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
3047 Ops.push_back(TexHandle);
3048 Ops.push_back(N->getOperand(2));
3049 Ops.push_back(N->getOperand(3));
3050 Ops.push_back(Chain);
3051 break;
3052 case NVPTXISD::Suld2DI8Trap:
3053 Opc = NVPTX::SULD_2D_I8_TRAP;
3054 Ops.push_back(TexHandle);
3055 Ops.push_back(N->getOperand(2));
3056 Ops.push_back(N->getOperand(3));
3057 Ops.push_back(Chain);
3058 break;
3059 case NVPTXISD::Suld2DI16Trap:
3060 Opc = NVPTX::SULD_2D_I16_TRAP;
3061 Ops.push_back(TexHandle);
3062 Ops.push_back(N->getOperand(2));
3063 Ops.push_back(N->getOperand(3));
3064 Ops.push_back(Chain);
3065 break;
3066 case NVPTXISD::Suld2DI32Trap:
3067 Opc = NVPTX::SULD_2D_I32_TRAP;
3068 Ops.push_back(TexHandle);
3069 Ops.push_back(N->getOperand(2));
3070 Ops.push_back(N->getOperand(3));
3071 Ops.push_back(Chain);
3072 break;
3073 case NVPTXISD::Suld2DV2I8Trap:
3074 Opc = NVPTX::SULD_2D_V2I8_TRAP;
3075 Ops.push_back(TexHandle);
3076 Ops.push_back(N->getOperand(2));
3077 Ops.push_back(N->getOperand(3));
3078 Ops.push_back(Chain);
3079 break;
3080 case NVPTXISD::Suld2DV2I16Trap:
3081 Opc = NVPTX::SULD_2D_V2I16_TRAP;
3082 Ops.push_back(TexHandle);
3083 Ops.push_back(N->getOperand(2));
3084 Ops.push_back(N->getOperand(3));
3085 Ops.push_back(Chain);
3086 break;
3087 case NVPTXISD::Suld2DV2I32Trap:
3088 Opc = NVPTX::SULD_2D_V2I32_TRAP;
3089 Ops.push_back(TexHandle);
3090 Ops.push_back(N->getOperand(2));
3091 Ops.push_back(N->getOperand(3));
3092 Ops.push_back(Chain);
3093 break;
3094 case NVPTXISD::Suld2DV4I8Trap:
3095 Opc = NVPTX::SULD_2D_V4I8_TRAP;
3096 Ops.push_back(TexHandle);
3097 Ops.push_back(N->getOperand(2));
3098 Ops.push_back(N->getOperand(3));
3099 Ops.push_back(Chain);
3100 break;
3101 case NVPTXISD::Suld2DV4I16Trap:
3102 Opc = NVPTX::SULD_2D_V4I16_TRAP;
3103 Ops.push_back(TexHandle);
3104 Ops.push_back(N->getOperand(2));
3105 Ops.push_back(N->getOperand(3));
3106 Ops.push_back(Chain);
3107 break;
3108 case NVPTXISD::Suld2DV4I32Trap:
3109 Opc = NVPTX::SULD_2D_V4I32_TRAP;
3110 Ops.push_back(TexHandle);
3111 Ops.push_back(N->getOperand(2));
3112 Ops.push_back(N->getOperand(3));
3113 Ops.push_back(Chain);
3114 break;
3115 case NVPTXISD::Suld2DArrayI8Trap:
3116 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
3117 Ops.push_back(TexHandle);
3118 Ops.push_back(N->getOperand(2));
3119 Ops.push_back(N->getOperand(3));
3120 Ops.push_back(N->getOperand(4));
3121 Ops.push_back(Chain);
3122 break;
3123 case NVPTXISD::Suld2DArrayI16Trap:
3124 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
3125 Ops.push_back(TexHandle);
3126 Ops.push_back(N->getOperand(2));
3127 Ops.push_back(N->getOperand(3));
3128 Ops.push_back(N->getOperand(4));
3129 Ops.push_back(Chain);
3130 break;
3131 case NVPTXISD::Suld2DArrayI32Trap:
3132 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
3133 Ops.push_back(TexHandle);
3134 Ops.push_back(N->getOperand(2));
3135 Ops.push_back(N->getOperand(3));
3136 Ops.push_back(N->getOperand(4));
3137 Ops.push_back(Chain);
3138 break;
3139 case NVPTXISD::Suld2DArrayV2I8Trap:
3140 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
3141 Ops.push_back(TexHandle);
3142 Ops.push_back(N->getOperand(2));
3143 Ops.push_back(N->getOperand(3));
3144 Ops.push_back(N->getOperand(4));
3145 Ops.push_back(Chain);
3146 break;
3147 case NVPTXISD::Suld2DArrayV2I16Trap:
3148 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
3149 Ops.push_back(TexHandle);
3150 Ops.push_back(N->getOperand(2));
3151 Ops.push_back(N->getOperand(3));
3152 Ops.push_back(N->getOperand(4));
3153 Ops.push_back(Chain);
3154 break;
3155 case NVPTXISD::Suld2DArrayV2I32Trap:
3156 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
3157 Ops.push_back(TexHandle);
3158 Ops.push_back(N->getOperand(2));
3159 Ops.push_back(N->getOperand(3));
3160 Ops.push_back(N->getOperand(4));
3161 Ops.push_back(Chain);
3162 break;
3163 case NVPTXISD::Suld2DArrayV4I8Trap:
3164 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
3165 Ops.push_back(TexHandle);
3166 Ops.push_back(N->getOperand(2));
3167 Ops.push_back(N->getOperand(3));
3168 Ops.push_back(N->getOperand(4));
3169 Ops.push_back(Chain);
3170 break;
3171 case NVPTXISD::Suld2DArrayV4I16Trap:
3172 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
3173 Ops.push_back(TexHandle);
3174 Ops.push_back(N->getOperand(2));
3175 Ops.push_back(N->getOperand(3));
3176 Ops.push_back(N->getOperand(4));
3177 Ops.push_back(Chain);
3178 break;
3179 case NVPTXISD::Suld2DArrayV4I32Trap:
3180 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
3181 Ops.push_back(TexHandle);
3182 Ops.push_back(N->getOperand(2));
3183 Ops.push_back(N->getOperand(3));
3184 Ops.push_back(N->getOperand(4));
3185 Ops.push_back(Chain);
3186 break;
3187 case NVPTXISD::Suld3DI8Trap:
3188 Opc = NVPTX::SULD_3D_I8_TRAP;
3189 Ops.push_back(TexHandle);
3190 Ops.push_back(N->getOperand(2));
3191 Ops.push_back(N->getOperand(3));
3192 Ops.push_back(N->getOperand(4));
3193 Ops.push_back(Chain);
3194 break;
3195 case NVPTXISD::Suld3DI16Trap:
3196 Opc = NVPTX::SULD_3D_I16_TRAP;
3197 Ops.push_back(TexHandle);
3198 Ops.push_back(N->getOperand(2));
3199 Ops.push_back(N->getOperand(3));
3200 Ops.push_back(N->getOperand(4));
3201 Ops.push_back(Chain);
3202 break;
3203 case NVPTXISD::Suld3DI32Trap:
3204 Opc = NVPTX::SULD_3D_I32_TRAP;
3205 Ops.push_back(TexHandle);
3206 Ops.push_back(N->getOperand(2));
3207 Ops.push_back(N->getOperand(3));
3208 Ops.push_back(N->getOperand(4));
3209 Ops.push_back(Chain);
3210 break;
3211 case NVPTXISD::Suld3DV2I8Trap:
3212 Opc = NVPTX::SULD_3D_V2I8_TRAP;
3213 Ops.push_back(TexHandle);
3214 Ops.push_back(N->getOperand(2));
3215 Ops.push_back(N->getOperand(3));
3216 Ops.push_back(N->getOperand(4));
3217 Ops.push_back(Chain);
3218 break;
3219 case NVPTXISD::Suld3DV2I16Trap:
3220 Opc = NVPTX::SULD_3D_V2I16_TRAP;
3221 Ops.push_back(TexHandle);
3222 Ops.push_back(N->getOperand(2));
3223 Ops.push_back(N->getOperand(3));
3224 Ops.push_back(N->getOperand(4));
3225 Ops.push_back(Chain);
3226 break;
3227 case NVPTXISD::Suld3DV2I32Trap:
3228 Opc = NVPTX::SULD_3D_V2I32_TRAP;
3229 Ops.push_back(TexHandle);
3230 Ops.push_back(N->getOperand(2));
3231 Ops.push_back(N->getOperand(3));
3232 Ops.push_back(N->getOperand(4));
3233 Ops.push_back(Chain);
3234 break;
3235 case NVPTXISD::Suld3DV4I8Trap:
3236 Opc = NVPTX::SULD_3D_V4I8_TRAP;
3237 Ops.push_back(TexHandle);
3238 Ops.push_back(N->getOperand(2));
3239 Ops.push_back(N->getOperand(3));
3240 Ops.push_back(N->getOperand(4));
3241 Ops.push_back(Chain);
3242 break;
3243 case NVPTXISD::Suld3DV4I16Trap:
3244 Opc = NVPTX::SULD_3D_V4I16_TRAP;
3245 Ops.push_back(TexHandle);
3246 Ops.push_back(N->getOperand(2));
3247 Ops.push_back(N->getOperand(3));
3248 Ops.push_back(N->getOperand(4));
3249 Ops.push_back(Chain);
3250 break;
3251 case NVPTXISD::Suld3DV4I32Trap:
3252 Opc = NVPTX::SULD_3D_V4I32_TRAP;
3253 Ops.push_back(TexHandle);
3254 Ops.push_back(N->getOperand(2));
3255 Ops.push_back(N->getOperand(3));
3256 Ops.push_back(N->getOperand(4));
3257 Ops.push_back(Chain);
3258 break;
3259 }
3260 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3261 return Ret;
3262 }
3263
3264 /// SelectBFE - Look for instruction sequences that can be made more efficient
3265 /// by using the 'bfe' (bit-field extract) PTX instruction
SelectBFE(SDNode * N)3266 SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
3267 SDValue LHS = N->getOperand(0);
3268 SDValue RHS = N->getOperand(1);
3269 SDValue Len;
3270 SDValue Start;
3271 SDValue Val;
3272 bool IsSigned = false;
3273
3274 if (N->getOpcode() == ISD::AND) {
3275 // Canonicalize the operands
3276 // We want 'and %val, %mask'
3277 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3278 std::swap(LHS, RHS);
3279 }
3280
3281 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3282 if (!Mask) {
3283 // We need a constant mask on the RHS of the AND
3284 return NULL;
3285 }
3286
3287 // Extract the mask bits
3288 uint64_t MaskVal = Mask->getZExtValue();
3289 if (!isMask_64(MaskVal)) {
3290 // We *could* handle shifted masks here, but doing so would require an
3291 // 'and' operation to fix up the low-order bits so we would trade
3292 // shr+and for bfe+and, which has the same throughput
3293 return NULL;
3294 }
3295
3296 // How many bits are in our mask?
3297 uint64_t NumBits = CountTrailingOnes_64(MaskVal);
3298 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
3299
3300 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3301 // We have a 'srl/and' pair, extract the effective start bit and length
3302 Val = LHS.getNode()->getOperand(0);
3303 Start = LHS.getNode()->getOperand(1);
3304 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3305 if (StartConst) {
3306 uint64_t StartVal = StartConst->getZExtValue();
3307 // How many "good" bits do we have left? "good" is defined here as bits
3308 // that exist in the original value, not shifted in.
3309 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
3310 if (NumBits > GoodBits) {
3311 // Do not handle the case where bits have been shifted in. In theory
3312 // we could handle this, but the cost is likely higher than just
3313 // emitting the srl/and pair.
3314 return NULL;
3315 }
3316 Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
3317 } else {
3318 // Do not handle the case where the shift amount (can be zero if no srl
3319 // was found) is not constant. We could handle this case, but it would
3320 // require run-time logic that would be more expensive than just
3321 // emitting the srl/and pair.
3322 return NULL;
3323 }
3324 } else {
3325 // Do not handle the case where the LHS of the and is not a shift. While
3326 // it would be trivial to handle this case, it would just transform
3327 // 'and' -> 'bfe', but 'and' has higher-throughput.
3328 return NULL;
3329 }
3330 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3331 if (LHS->getOpcode() == ISD::AND) {
3332 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3333 if (!ShiftCnst) {
3334 // Shift amount must be constant
3335 return NULL;
3336 }
3337
3338 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3339
3340 SDValue AndLHS = LHS->getOperand(0);
3341 SDValue AndRHS = LHS->getOperand(1);
3342
3343 // Canonicalize the AND to have the mask on the RHS
3344 if (isa<ConstantSDNode>(AndLHS)) {
3345 std::swap(AndLHS, AndRHS);
3346 }
3347
3348 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3349 if (!MaskCnst) {
3350 // Mask must be constant
3351 return NULL;
3352 }
3353
3354 uint64_t MaskVal = MaskCnst->getZExtValue();
3355 uint64_t NumZeros;
3356 uint64_t NumBits;
3357 if (isMask_64(MaskVal)) {
3358 NumZeros = 0;
3359 // The number of bits in the result bitfield will be the number of
3360 // trailing ones (the AND) minus the number of bits we shift off
3361 NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
3362 } else if (isShiftedMask_64(MaskVal)) {
3363 NumZeros = countTrailingZeros(MaskVal);
3364 unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
3365 // The number of bits in the result bitfield will be the number of
3366 // trailing zeros plus the number of set bits in the mask minus the
3367 // number of bits we shift off
3368 NumBits = NumZeros + NumOnes - ShiftAmt;
3369 } else {
3370 // This is not a mask we can handle
3371 return NULL;
3372 }
3373
3374 if (ShiftAmt < NumZeros) {
3375 // Handling this case would require extra logic that would make this
3376 // transformation non-profitable
3377 return NULL;
3378 }
3379
3380 Val = AndLHS;
3381 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
3382 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
3383 } else if (LHS->getOpcode() == ISD::SHL) {
3384 // Here, we have a pattern like:
3385 //
3386 // (sra (shl val, NN), MM)
3387 // or
3388 // (srl (shl val, NN), MM)
3389 //
3390 // If MM >= NN, we can efficiently optimize this with bfe
3391 Val = LHS->getOperand(0);
3392
3393 SDValue ShlRHS = LHS->getOperand(1);
3394 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3395 if (!ShlCnst) {
3396 // Shift amount must be constant
3397 return NULL;
3398 }
3399 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3400
3401 SDValue ShrRHS = RHS;
3402 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3403 if (!ShrCnst) {
3404 // Shift amount must be constant
3405 return NULL;
3406 }
3407 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3408
3409 // To avoid extra codegen and be profitable, we need Outer >= Inner
3410 if (OuterShiftAmt < InnerShiftAmt) {
3411 return NULL;
3412 }
3413
3414 // If the outer shift is more than the type size, we have no bitfield to
3415 // extract (since we also check that the inner shift is <= the outer shift
3416 // then this also implies that the inner shift is < the type size)
3417 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
3418 return NULL;
3419 }
3420
3421 Start =
3422 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
3423 Len =
3424 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
3425 OuterShiftAmt, MVT::i32);
3426
3427 if (N->getOpcode() == ISD::SRA) {
3428 // If we have a arithmetic right shift, we need to use the signed bfe
3429 // variant
3430 IsSigned = true;
3431 }
3432 } else {
3433 // No can do...
3434 return NULL;
3435 }
3436 } else {
3437 // No can do...
3438 return NULL;
3439 }
3440
3441
3442 unsigned Opc;
3443 // For the BFE operations we form here from "and" and "srl", always use the
3444 // unsigned variants.
3445 if (Val.getValueType() == MVT::i32) {
3446 if (IsSigned) {
3447 Opc = NVPTX::BFE_S32rii;
3448 } else {
3449 Opc = NVPTX::BFE_U32rii;
3450 }
3451 } else if (Val.getValueType() == MVT::i64) {
3452 if (IsSigned) {
3453 Opc = NVPTX::BFE_S64rii;
3454 } else {
3455 Opc = NVPTX::BFE_U64rii;
3456 }
3457 } else {
3458 // We cannot handle this type
3459 return NULL;
3460 }
3461
3462 SDValue Ops[] = {
3463 Val, Start, Len
3464 };
3465
3466 SDNode *Ret =
3467 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3468
3469 return Ret;
3470 }
3471
3472 // SelectDirectAddr - Match a direct address for DAG.
3473 // A direct address could be a globaladdress or externalsymbol.
SelectDirectAddr(SDValue N,SDValue & Address)3474 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3475 // Return true if TGA or ES.
3476 if (N.getOpcode() == ISD::TargetGlobalAddress ||
3477 N.getOpcode() == ISD::TargetExternalSymbol) {
3478 Address = N;
3479 return true;
3480 }
3481 if (N.getOpcode() == NVPTXISD::Wrapper) {
3482 Address = N.getOperand(0);
3483 return true;
3484 }
3485 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3486 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
3487 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
3488 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
3489 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
3490 }
3491 return false;
3492 }
3493
3494 // symbol+offset
SelectADDRsi_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)3495 bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3496 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3497 if (Addr.getOpcode() == ISD::ADD) {
3498 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3499 SDValue base = Addr.getOperand(0);
3500 if (SelectDirectAddr(base, Base)) {
3501 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
3502 return true;
3503 }
3504 }
3505 }
3506 return false;
3507 }
3508
3509 // symbol+offset
SelectADDRsi(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)3510 bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3511 SDValue &Base, SDValue &Offset) {
3512 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3513 }
3514
3515 // symbol+offset
SelectADDRsi64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)3516 bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3517 SDValue &Base, SDValue &Offset) {
3518 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3519 }
3520
3521 // register+offset
SelectADDRri_imp(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset,MVT mvt)3522 bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3523 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
3524 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3525 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3526 Offset = CurDAG->getTargetConstant(0, mvt);
3527 return true;
3528 }
3529 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3530 Addr.getOpcode() == ISD::TargetGlobalAddress)
3531 return false; // direct calls.
3532
3533 if (Addr.getOpcode() == ISD::ADD) {
3534 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3535 return false;
3536 }
3537 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3538 if (FrameIndexSDNode *FIN =
3539 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
3540 // Constant offset from frame ref.
3541 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3542 else
3543 Base = Addr.getOperand(0);
3544 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
3545 return true;
3546 }
3547 }
3548 return false;
3549 }
3550
3551 // register+offset
SelectADDRri(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)3552 bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3553 SDValue &Base, SDValue &Offset) {
3554 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3555 }
3556
3557 // register+offset
SelectADDRri64(SDNode * OpNode,SDValue Addr,SDValue & Base,SDValue & Offset)3558 bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3559 SDValue &Base, SDValue &Offset) {
3560 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3561 }
3562
ChkMemSDNodeAddressSpace(SDNode * N,unsigned int spN) const3563 bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3564 unsigned int spN) const {
3565 const Value *Src = nullptr;
3566 // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
3567 // the classof() for MemSDNode does not include MemIntrinsicSDNode
3568 // (See SelectionDAGNodes.h). So we need to check for both.
3569 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
3570 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3571 return true;
3572 Src = mN->getMemOperand()->getValue();
3573 } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
3574 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3575 return true;
3576 Src = mN->getMemOperand()->getValue();
3577 }
3578 if (!Src)
3579 return false;
3580 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
3581 return (PT->getAddressSpace() == spN);
3582 return false;
3583 }
3584
3585 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3586 /// inline asm expressions.
SelectInlineAsmMemoryOperand(const SDValue & Op,char ConstraintCode,std::vector<SDValue> & OutOps)3587 bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
3588 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
3589 SDValue Op0, Op1;
3590 switch (ConstraintCode) {
3591 default:
3592 return true;
3593 case 'm': // memory
3594 if (SelectDirectAddr(Op, Op0)) {
3595 OutOps.push_back(Op0);
3596 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
3597 return false;
3598 }
3599 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3600 OutOps.push_back(Op0);
3601 OutOps.push_back(Op1);
3602 return false;
3603 }
3604 break;
3605 }
3606 return true;
3607 }
3608