• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the ARMSelectionDAGInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ARMTargetMachine.h"
15 #include "llvm/CodeGen/SelectionDAG.h"
16 #include "llvm/IR/DerivedTypes.h"
17 using namespace llvm;
18 
19 #define DEBUG_TYPE "arm-selectiondag-info"
20 
21 // Emit, if possible, a specialized version of the given Libcall. Typically this
22 // means selecting the appropriately aligned version, but we also convert memset
23 // of 0 into memclr.
24 SDValue ARMSelectionDAGInfo::
EmitSpecializedLibcall(SelectionDAG & DAG,SDLoc dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,RTLIB::Libcall LC) const25 EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
26                        SDValue Chain,
27                        SDValue Dst, SDValue Src,
28                        SDValue Size, unsigned Align,
29                        RTLIB::Libcall LC) const {
30   const ARMSubtarget &Subtarget =
31       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
32   const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
33 
34   // Only use a specialized AEABI function if the default version of this
35   // Libcall is an AEABI function.
36   if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
37     return SDValue();
38 
39   // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
40   // able to translate memset to memclr and use the value to index the function
41   // name array.
42   enum {
43     AEABI_MEMCPY = 0,
44     AEABI_MEMMOVE,
45     AEABI_MEMSET,
46     AEABI_MEMCLR
47   } AEABILibcall;
48   switch (LC) {
49   case RTLIB::MEMCPY:
50     AEABILibcall = AEABI_MEMCPY;
51     break;
52   case RTLIB::MEMMOVE:
53     AEABILibcall = AEABI_MEMMOVE;
54     break;
55   case RTLIB::MEMSET:
56     AEABILibcall = AEABI_MEMSET;
57     if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
58       if (ConstantSrc->getZExtValue() == 0)
59         AEABILibcall = AEABI_MEMCLR;
60     break;
61   default:
62     return SDValue();
63   }
64 
65   // Choose the most-aligned libcall variant that we can
66   enum {
67     ALIGN1 = 0,
68     ALIGN4,
69     ALIGN8
70   } AlignVariant;
71   if ((Align & 7) == 0)
72     AlignVariant = ALIGN8;
73   else if ((Align & 3) == 0)
74     AlignVariant = ALIGN4;
75   else
76     AlignVariant = ALIGN1;
77 
78   TargetLowering::ArgListTy Args;
79   TargetLowering::ArgListEntry Entry;
80   Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
81   Entry.Node = Dst;
82   Args.push_back(Entry);
83   if (AEABILibcall == AEABI_MEMCLR) {
84     Entry.Node = Size;
85     Args.push_back(Entry);
86   } else if (AEABILibcall == AEABI_MEMSET) {
87     // Adjust parameters for memset, EABI uses format (ptr, size, value),
88     // GNU library uses (ptr, value, size)
89     // See RTABI section 4.3.4
90     Entry.Node = Size;
91     Args.push_back(Entry);
92 
93     // Extend or truncate the argument to be an i32 value for the call.
94     if (Src.getValueType().bitsGT(MVT::i32))
95       Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
96     else if (Src.getValueType().bitsLT(MVT::i32))
97       Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
98 
99     Entry.Node = Src;
100     Entry.Ty = Type::getInt32Ty(*DAG.getContext());
101     Entry.isSExt = false;
102     Args.push_back(Entry);
103   } else {
104     Entry.Node = Src;
105     Args.push_back(Entry);
106 
107     Entry.Node = Size;
108     Args.push_back(Entry);
109   }
110 
111   char const *FunctionNames[4][3] = {
112     { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
113     { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
114     { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
115     { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
116   };
117   TargetLowering::CallLoweringInfo CLI(DAG);
118   CLI.setDebugLoc(dl)
119       .setChain(Chain)
120       .setCallee(
121            TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
122            DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
123                                  TLI->getPointerTy(DAG.getDataLayout())),
124            std::move(Args), 0)
125       .setDiscardResult();
126   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
127 
128   return CallResult.second;
129 }
130 
131 SDValue
EmitTargetCodeForMemcpy(SelectionDAG & DAG,SDLoc dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,bool isVolatile,bool AlwaysInline,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo) const132 ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
133                                              SDValue Chain,
134                                              SDValue Dst, SDValue Src,
135                                              SDValue Size, unsigned Align,
136                                              bool isVolatile, bool AlwaysInline,
137                                              MachinePointerInfo DstPtrInfo,
138                                           MachinePointerInfo SrcPtrInfo) const {
139   const ARMSubtarget &Subtarget =
140       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
141   // Do repeated 4-byte loads and stores. To be improved.
142   // This requires 4-byte alignment.
143   if ((Align & 3) != 0)
144     return SDValue();
145   // This requires the copy size to be a constant, preferably
146   // within a subtarget-specific limit.
147   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
148   if (!ConstantSize)
149     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
150                                   RTLIB::MEMCPY);
151   uint64_t SizeVal = ConstantSize->getZExtValue();
152   if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
153     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
154                                   RTLIB::MEMCPY);
155 
156   unsigned BytesLeft = SizeVal & 3;
157   unsigned NumMemOps = SizeVal >> 2;
158   unsigned EmittedNumMemOps = 0;
159   EVT VT = MVT::i32;
160   unsigned VTSize = 4;
161   unsigned i = 0;
162   // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
163   const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
164   SDValue TFOps[6];
165   SDValue Loads[6];
166   uint64_t SrcOff = 0, DstOff = 0;
167 
168   // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
169   // VLDM/VSTM and make this code emit it when appropriate. This would reduce
170   // pressure on the general purpose registers. However this seems harder to map
171   // onto the register allocator's view of the world.
172 
173   // The number of MEMCPY pseudo-instructions to emit. We use up to
174   // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
175   // later on. This is a lower bound on the number of MEMCPY operations we must
176   // emit.
177   unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
178 
179   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
180 
181   for (unsigned I = 0; I != NumMEMCPYs; ++I) {
182     // Evenly distribute registers among MEMCPY operations to reduce register
183     // pressure.
184     unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
185     unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
186 
187     Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
188                       DAG.getConstant(NumRegs, dl, MVT::i32));
189     Src = Dst.getValue(1);
190     Chain = Dst.getValue(2);
191 
192     DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
193     SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
194 
195     EmittedNumMemOps = NextEmittedNumMemOps;
196   }
197 
198   if (BytesLeft == 0)
199     return Chain;
200 
201   // Issue loads / stores for the trailing (1 - 3) bytes.
202   unsigned BytesLeftSave = BytesLeft;
203   i = 0;
204   while (BytesLeft) {
205     if (BytesLeft >= 2) {
206       VT = MVT::i16;
207       VTSize = 2;
208     } else {
209       VT = MVT::i8;
210       VTSize = 1;
211     }
212 
213     Loads[i] = DAG.getLoad(VT, dl, Chain,
214                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
215                                        DAG.getConstant(SrcOff, dl, MVT::i32)),
216                            SrcPtrInfo.getWithOffset(SrcOff),
217                            false, false, false, 0);
218     TFOps[i] = Loads[i].getValue(1);
219     ++i;
220     SrcOff += VTSize;
221     BytesLeft -= VTSize;
222   }
223   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
224                       makeArrayRef(TFOps, i));
225 
226   i = 0;
227   BytesLeft = BytesLeftSave;
228   while (BytesLeft) {
229     if (BytesLeft >= 2) {
230       VT = MVT::i16;
231       VTSize = 2;
232     } else {
233       VT = MVT::i8;
234       VTSize = 1;
235     }
236 
237     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
238                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
239                                         DAG.getConstant(DstOff, dl, MVT::i32)),
240                             DstPtrInfo.getWithOffset(DstOff), false, false, 0);
241     ++i;
242     DstOff += VTSize;
243     BytesLeft -= VTSize;
244   }
245   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
246                      makeArrayRef(TFOps, i));
247 }
248 
249 
250 SDValue ARMSelectionDAGInfo::
EmitTargetCodeForMemmove(SelectionDAG & DAG,SDLoc dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,bool isVolatile,MachinePointerInfo DstPtrInfo,MachinePointerInfo SrcPtrInfo) const251 EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
252                          SDValue Chain,
253                          SDValue Dst, SDValue Src,
254                          SDValue Size, unsigned Align,
255                          bool isVolatile,
256                          MachinePointerInfo DstPtrInfo,
257                          MachinePointerInfo SrcPtrInfo) const {
258   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
259                                 RTLIB::MEMMOVE);
260 }
261 
262 
263 SDValue ARMSelectionDAGInfo::
EmitTargetCodeForMemset(SelectionDAG & DAG,SDLoc dl,SDValue Chain,SDValue Dst,SDValue Src,SDValue Size,unsigned Align,bool isVolatile,MachinePointerInfo DstPtrInfo) const264 EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
265                         SDValue Chain, SDValue Dst,
266                         SDValue Src, SDValue Size,
267                         unsigned Align, bool isVolatile,
268                         MachinePointerInfo DstPtrInfo) const {
269   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
270                                 RTLIB::MEMSET);
271 }
272