1 //===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PTXSelectionDAGInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #define DEBUG_TYPE "ptx-selectiondag-info"
15 #include "PTXTargetMachine.h"
16 #include "llvm/DerivedTypes.h"
17 #include "llvm/CodeGen/SelectionDAG.h"
18 using namespace llvm;
19
PTXSelectionDAGInfo(const TargetMachine & TM)20 PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
21 : TargetSelectionDAGInfo(TM),
22 Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
23 }
24
~PTXSelectionDAGInfo()25 PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
26 }
27
28 SDValue
EmitTargetCodeForMemcpy(SelectionDAG & DAG,DebugLoc dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,bool isVolatile,bool AlwaysInline,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo) const29 PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
30 SDValue Chain,
31 SDValue Dst, SDValue Src,
32 SDValue Size, unsigned Align,
33 bool isVolatile, bool AlwaysInline,
34 MachinePointerInfo DstPtrInfo,
35 MachinePointerInfo SrcPtrInfo) const {
36 // Do repeated 4-byte loads and stores. To be improved.
37 // This requires 4-byte alignment.
38 if ((Align & 3) != 0)
39 return SDValue();
40 // This requires the copy size to be a constant, preferably
41 // within a subtarget-specific limit.
42 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
43 if (!ConstantSize)
44 return SDValue();
45 uint64_t SizeVal = ConstantSize->getZExtValue();
46 // Always inline memcpys. In PTX, we do not have a C library that provides
47 // a memcpy function.
48 //if (!AlwaysInline)
49 // return SDValue();
50
51 unsigned BytesLeft = SizeVal & 3;
52 unsigned NumMemOps = SizeVal >> 2;
53 unsigned EmittedNumMemOps = 0;
54 EVT VT = MVT::i32;
55 unsigned VTSize = 4;
56 unsigned i = 0;
57 const unsigned MAX_LOADS_IN_LDM = 6;
58 SDValue TFOps[MAX_LOADS_IN_LDM];
59 SDValue Loads[MAX_LOADS_IN_LDM];
60 uint64_t SrcOff = 0, DstOff = 0;
61 EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
62
63 // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
64 // same number of stores. The loads and stores will get combined into
65 // ldm/stm later on.
66 while (EmittedNumMemOps < NumMemOps) {
67 for (i = 0;
68 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
69 Loads[i] = DAG.getLoad(VT, dl, Chain,
70 DAG.getNode(ISD::ADD, dl, PointerType, Src,
71 DAG.getConstant(SrcOff, PointerType)),
72 SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
73 false, 0);
74 TFOps[i] = Loads[i].getValue(1);
75 SrcOff += VTSize;
76 }
77 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
78
79 for (i = 0;
80 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
81 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
82 DAG.getNode(ISD::ADD, dl, PointerType, Dst,
83 DAG.getConstant(DstOff, PointerType)),
84 DstPtrInfo.getWithOffset(DstOff),
85 isVolatile, false, 0);
86 DstOff += VTSize;
87 }
88 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
89
90 EmittedNumMemOps += i;
91 }
92
93 if (BytesLeft == 0)
94 return Chain;
95
96 // Issue loads / stores for the trailing (1 - 3) bytes.
97 unsigned BytesLeftSave = BytesLeft;
98 i = 0;
99 while (BytesLeft) {
100 if (BytesLeft >= 2) {
101 VT = MVT::i16;
102 VTSize = 2;
103 } else {
104 VT = MVT::i8;
105 VTSize = 1;
106 }
107
108 Loads[i] = DAG.getLoad(VT, dl, Chain,
109 DAG.getNode(ISD::ADD, dl, PointerType, Src,
110 DAG.getConstant(SrcOff, PointerType)),
111 SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
112 TFOps[i] = Loads[i].getValue(1);
113 ++i;
114 SrcOff += VTSize;
115 BytesLeft -= VTSize;
116 }
117 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
118
119 i = 0;
120 BytesLeft = BytesLeftSave;
121 while (BytesLeft) {
122 if (BytesLeft >= 2) {
123 VT = MVT::i16;
124 VTSize = 2;
125 } else {
126 VT = MVT::i8;
127 VTSize = 1;
128 }
129
130 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
131 DAG.getNode(ISD::ADD, dl, PointerType, Dst,
132 DAG.getConstant(DstOff, PointerType)),
133 DstPtrInfo.getWithOffset(DstOff), false, false, 0);
134 ++i;
135 DstOff += VTSize;
136 BytesLeft -= VTSize;
137 }
138 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
139 }
140
141 SDValue PTXSelectionDAGInfo::
EmitTargetCodeForMemset(SelectionDAG & DAG,DebugLoc dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,bool isVolatile,MachinePointerInfo DstPtrInfo) const142 EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
143 SDValue Chain, SDValue Dst,
144 SDValue Src, SDValue Size,
145 unsigned Align, bool isVolatile,
146 MachinePointerInfo DstPtrInfo) const {
147 llvm_unreachable("memset lowering not implemented for PTX yet");
148 }
149
150