1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the ARMSelectionDAGInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "ARMTargetMachine.h"
15 #include "llvm/CodeGen/SelectionDAG.h"
16 #include "llvm/IR/DerivedTypes.h"
17 using namespace llvm;
18
19 #define DEBUG_TYPE "arm-selectiondag-info"
20
21 // Emit, if possible, a specialized version of the given Libcall. Typically this
22 // means selecting the appropriately aligned version, but we also convert memset
23 // of 0 into memclr.
24 SDValue ARMSelectionDAGInfo::
EmitSpecializedLibcall(SelectionDAG & DAG,SDLoc dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,RTLIB::Libcall LC) const25 EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
26 SDValue Chain,
27 SDValue Dst, SDValue Src,
28 SDValue Size, unsigned Align,
29 RTLIB::Libcall LC) const {
30 const ARMSubtarget &Subtarget =
31 DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
32 const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
33
34 // Only use a specialized AEABI function if the default version of this
35 // Libcall is an AEABI function.
36 if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
37 return SDValue();
38
39 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
40 // able to translate memset to memclr and use the value to index the function
41 // name array.
42 enum {
43 AEABI_MEMCPY = 0,
44 AEABI_MEMMOVE,
45 AEABI_MEMSET,
46 AEABI_MEMCLR
47 } AEABILibcall;
48 switch (LC) {
49 case RTLIB::MEMCPY:
50 AEABILibcall = AEABI_MEMCPY;
51 break;
52 case RTLIB::MEMMOVE:
53 AEABILibcall = AEABI_MEMMOVE;
54 break;
55 case RTLIB::MEMSET:
56 AEABILibcall = AEABI_MEMSET;
57 if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
58 if (ConstantSrc->getZExtValue() == 0)
59 AEABILibcall = AEABI_MEMCLR;
60 break;
61 default:
62 return SDValue();
63 }
64
65 // Choose the most-aligned libcall variant that we can
66 enum {
67 ALIGN1 = 0,
68 ALIGN4,
69 ALIGN8
70 } AlignVariant;
71 if ((Align & 7) == 0)
72 AlignVariant = ALIGN8;
73 else if ((Align & 3) == 0)
74 AlignVariant = ALIGN4;
75 else
76 AlignVariant = ALIGN1;
77
78 TargetLowering::ArgListTy Args;
79 TargetLowering::ArgListEntry Entry;
80 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
81 Entry.Node = Dst;
82 Args.push_back(Entry);
83 if (AEABILibcall == AEABI_MEMCLR) {
84 Entry.Node = Size;
85 Args.push_back(Entry);
86 } else if (AEABILibcall == AEABI_MEMSET) {
87 // Adjust parameters for memset, EABI uses format (ptr, size, value),
88 // GNU library uses (ptr, value, size)
89 // See RTABI section 4.3.4
90 Entry.Node = Size;
91 Args.push_back(Entry);
92
93 // Extend or truncate the argument to be an i32 value for the call.
94 if (Src.getValueType().bitsGT(MVT::i32))
95 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
96 else if (Src.getValueType().bitsLT(MVT::i32))
97 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
98
99 Entry.Node = Src;
100 Entry.Ty = Type::getInt32Ty(*DAG.getContext());
101 Entry.isSExt = false;
102 Args.push_back(Entry);
103 } else {
104 Entry.Node = Src;
105 Args.push_back(Entry);
106
107 Entry.Node = Size;
108 Args.push_back(Entry);
109 }
110
111 char const *FunctionNames[4][3] = {
112 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
113 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
114 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
115 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
116 };
117 TargetLowering::CallLoweringInfo CLI(DAG);
118 CLI.setDebugLoc(dl)
119 .setChain(Chain)
120 .setCallee(
121 TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
122 DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
123 TLI->getPointerTy(DAG.getDataLayout())),
124 std::move(Args), 0)
125 .setDiscardResult();
126 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
127
128 return CallResult.second;
129 }
130
131 SDValue
EmitTargetCodeForMemcpy(SelectionDAG & DAG,SDLoc dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,bool isVolatile,bool AlwaysInline,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo) const132 ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
133 SDValue Chain,
134 SDValue Dst, SDValue Src,
135 SDValue Size, unsigned Align,
136 bool isVolatile, bool AlwaysInline,
137 MachinePointerInfo DstPtrInfo,
138 MachinePointerInfo SrcPtrInfo) const {
139 const ARMSubtarget &Subtarget =
140 DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
141 // Do repeated 4-byte loads and stores. To be improved.
142 // This requires 4-byte alignment.
143 if ((Align & 3) != 0)
144 return SDValue();
145 // This requires the copy size to be a constant, preferably
146 // within a subtarget-specific limit.
147 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
148 if (!ConstantSize)
149 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
150 RTLIB::MEMCPY);
151 uint64_t SizeVal = ConstantSize->getZExtValue();
152 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
153 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
154 RTLIB::MEMCPY);
155
156 unsigned BytesLeft = SizeVal & 3;
157 unsigned NumMemOps = SizeVal >> 2;
158 unsigned EmittedNumMemOps = 0;
159 EVT VT = MVT::i32;
160 unsigned VTSize = 4;
161 unsigned i = 0;
162 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
163 const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
164 SDValue TFOps[6];
165 SDValue Loads[6];
166 uint64_t SrcOff = 0, DstOff = 0;
167
168 // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
169 // VLDM/VSTM and make this code emit it when appropriate. This would reduce
170 // pressure on the general purpose registers. However this seems harder to map
171 // onto the register allocator's view of the world.
172
173 // The number of MEMCPY pseudo-instructions to emit. We use up to
174 // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
175 // later on. This is a lower bound on the number of MEMCPY operations we must
176 // emit.
177 unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
178
179 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
180
181 for (unsigned I = 0; I != NumMEMCPYs; ++I) {
182 // Evenly distribute registers among MEMCPY operations to reduce register
183 // pressure.
184 unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
185 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
186
187 Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
188 DAG.getConstant(NumRegs, dl, MVT::i32));
189 Src = Dst.getValue(1);
190 Chain = Dst.getValue(2);
191
192 DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
193 SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
194
195 EmittedNumMemOps = NextEmittedNumMemOps;
196 }
197
198 if (BytesLeft == 0)
199 return Chain;
200
201 // Issue loads / stores for the trailing (1 - 3) bytes.
202 unsigned BytesLeftSave = BytesLeft;
203 i = 0;
204 while (BytesLeft) {
205 if (BytesLeft >= 2) {
206 VT = MVT::i16;
207 VTSize = 2;
208 } else {
209 VT = MVT::i8;
210 VTSize = 1;
211 }
212
213 Loads[i] = DAG.getLoad(VT, dl, Chain,
214 DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
215 DAG.getConstant(SrcOff, dl, MVT::i32)),
216 SrcPtrInfo.getWithOffset(SrcOff),
217 false, false, false, 0);
218 TFOps[i] = Loads[i].getValue(1);
219 ++i;
220 SrcOff += VTSize;
221 BytesLeft -= VTSize;
222 }
223 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
224 makeArrayRef(TFOps, i));
225
226 i = 0;
227 BytesLeft = BytesLeftSave;
228 while (BytesLeft) {
229 if (BytesLeft >= 2) {
230 VT = MVT::i16;
231 VTSize = 2;
232 } else {
233 VT = MVT::i8;
234 VTSize = 1;
235 }
236
237 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
238 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
239 DAG.getConstant(DstOff, dl, MVT::i32)),
240 DstPtrInfo.getWithOffset(DstOff), false, false, 0);
241 ++i;
242 DstOff += VTSize;
243 BytesLeft -= VTSize;
244 }
245 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
246 makeArrayRef(TFOps, i));
247 }
248
249
250 SDValue ARMSelectionDAGInfo::
EmitTargetCodeForMemmove(SelectionDAG & DAG,SDLoc dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,bool isVolatile,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo) const251 EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
252 SDValue Chain,
253 SDValue Dst, SDValue Src,
254 SDValue Size, unsigned Align,
255 bool isVolatile,
256 MachinePointerInfo DstPtrInfo,
257 MachinePointerInfo SrcPtrInfo) const {
258 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
259 RTLIB::MEMMOVE);
260 }
261
262
263 SDValue ARMSelectionDAGInfo::
EmitTargetCodeForMemset(SelectionDAG & DAG,SDLoc dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,bool isVolatile,MachinePointerInfo DstPtrInfo) const264 EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
265 SDValue Chain, SDValue Dst,
266 SDValue Src, SDValue Size,
267 unsigned Align, bool isVolatile,
268 MachinePointerInfo DstPtrInfo) const {
269 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
270 RTLIB::MEMSET);
271 }
272