1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SPUTargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "SPUISelLowering.h"
14 #include "SPUTargetMachine.h"
15 #include "SPUFrameLowering.h"
16 #include "SPUMachineFunction.h"
17 #include "llvm/Constants.h"
18 #include "llvm/Function.h"
19 #include "llvm/Intrinsics.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/Type.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/Target/TargetOptions.h"
30 #include "llvm/ADT/VectorExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include <map>
36
37 using namespace llvm;
38
39 // Used in getTargetNodeName() below
40 namespace {
41 std::map<unsigned, const char *> node_names;
42
43 // Byte offset of the preferred slot (counted from the MSB)
prefslotOffset(EVT VT)44 int prefslotOffset(EVT VT) {
45 int retval=0;
46 if (VT==MVT::i1) retval=3;
47 if (VT==MVT::i8) retval=3;
48 if (VT==MVT::i16) retval=2;
49
50 return retval;
51 }
52
53 //! Expand a library call into an actual call DAG node
54 /*!
55 \note
56 This code is taken from SelectionDAGLegalize, since it is not exposed as
57 part of the LLVM SelectionDAG API.
58 */
59
60 SDValue
ExpandLibCall(RTLIB::Libcall LC,SDValue Op,SelectionDAG & DAG,bool isSigned,SDValue & Hi,const SPUTargetLowering & TLI)61 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
62 bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
63 // The input chain to this libcall is the entry node of the function.
64 // Legalizing the call will automatically add the previous call to the
65 // dependence.
66 SDValue InChain = DAG.getEntryNode();
67
68 TargetLowering::ArgListTy Args;
69 TargetLowering::ArgListEntry Entry;
70 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
71 EVT ArgVT = Op.getOperand(i).getValueType();
72 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
73 Entry.Node = Op.getOperand(i);
74 Entry.Ty = ArgTy;
75 Entry.isSExt = isSigned;
76 Entry.isZExt = !isSigned;
77 Args.push_back(Entry);
78 }
79 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
80 TLI.getPointerTy());
81
82 // Splice the libcall in wherever FindInputOutputChains tells us to.
83 Type *RetTy =
84 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
85 std::pair<SDValue, SDValue> CallInfo =
86 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
87 0, TLI.getLibcallCallingConv(LC), false,
88 /*isReturnValueUsed=*/true,
89 Callee, Args, DAG, Op.getDebugLoc());
90
91 return CallInfo.first;
92 }
93 }
94
SPUTargetLowering(SPUTargetMachine & TM)95 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
96 : TargetLowering(TM, new TargetLoweringObjectFileELF()),
97 SPUTM(TM) {
98
99 // Use _setjmp/_longjmp instead of setjmp/longjmp.
100 setUseUnderscoreSetJmp(true);
101 setUseUnderscoreLongJmp(true);
102
103 // Set RTLIB libcall names as used by SPU:
104 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
105
106 // Set up the SPU's register classes:
107 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
108 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
109 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
110 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
111 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
112 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
113 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
114
115 // SPU has no sign or zero extended loads for i1, i8, i16:
116 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
117 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
118 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
119
120 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
121 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
122
123 setTruncStoreAction(MVT::i128, MVT::i64, Expand);
124 setTruncStoreAction(MVT::i128, MVT::i32, Expand);
125 setTruncStoreAction(MVT::i128, MVT::i16, Expand);
126 setTruncStoreAction(MVT::i128, MVT::i8, Expand);
127
128 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
129
130 // SPU constant load actions are custom lowered:
131 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
132 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
133
134 // SPU's loads and stores have to be custom lowered:
135 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
136 ++sctype) {
137 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
138
139 setOperationAction(ISD::LOAD, VT, Custom);
140 setOperationAction(ISD::STORE, VT, Custom);
141 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
142 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
143 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
144
145 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
146 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
147 setTruncStoreAction(VT, StoreVT, Expand);
148 }
149 }
150
151 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
152 ++sctype) {
153 MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
154
155 setOperationAction(ISD::LOAD, VT, Custom);
156 setOperationAction(ISD::STORE, VT, Custom);
157
158 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
159 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
160 setTruncStoreAction(VT, StoreVT, Expand);
161 }
162 }
163
164 // Expand the jumptable branches
165 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
166 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
167
168 // Custom lower SELECT_CC for most cases, but expand by default
169 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
170 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
171 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
172 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
173 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
174
175 // SPU has no intrinsics for these particular operations:
176 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
177 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
178
179 // SPU has no division/remainder instructions
180 setOperationAction(ISD::SREM, MVT::i8, Expand);
181 setOperationAction(ISD::UREM, MVT::i8, Expand);
182 setOperationAction(ISD::SDIV, MVT::i8, Expand);
183 setOperationAction(ISD::UDIV, MVT::i8, Expand);
184 setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
185 setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
186 setOperationAction(ISD::SREM, MVT::i16, Expand);
187 setOperationAction(ISD::UREM, MVT::i16, Expand);
188 setOperationAction(ISD::SDIV, MVT::i16, Expand);
189 setOperationAction(ISD::UDIV, MVT::i16, Expand);
190 setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
191 setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
192 setOperationAction(ISD::SREM, MVT::i32, Expand);
193 setOperationAction(ISD::UREM, MVT::i32, Expand);
194 setOperationAction(ISD::SDIV, MVT::i32, Expand);
195 setOperationAction(ISD::UDIV, MVT::i32, Expand);
196 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
197 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
198 setOperationAction(ISD::SREM, MVT::i64, Expand);
199 setOperationAction(ISD::UREM, MVT::i64, Expand);
200 setOperationAction(ISD::SDIV, MVT::i64, Expand);
201 setOperationAction(ISD::UDIV, MVT::i64, Expand);
202 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
203 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
204 setOperationAction(ISD::SREM, MVT::i128, Expand);
205 setOperationAction(ISD::UREM, MVT::i128, Expand);
206 setOperationAction(ISD::SDIV, MVT::i128, Expand);
207 setOperationAction(ISD::UDIV, MVT::i128, Expand);
208 setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
209 setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
210
211 // We don't support sin/cos/sqrt/fmod
212 setOperationAction(ISD::FSIN , MVT::f64, Expand);
213 setOperationAction(ISD::FCOS , MVT::f64, Expand);
214 setOperationAction(ISD::FREM , MVT::f64, Expand);
215 setOperationAction(ISD::FSIN , MVT::f32, Expand);
216 setOperationAction(ISD::FCOS , MVT::f32, Expand);
217 setOperationAction(ISD::FREM , MVT::f32, Expand);
218
219 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
220 // for f32!)
221 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
222 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
223
224 setOperationAction(ISD::FMA, MVT::f64, Expand);
225 setOperationAction(ISD::FMA, MVT::f32, Expand);
226
227 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
228 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
229
230 // SPU can do rotate right and left, so legalize it... but customize for i8
231 // because instructions don't exist.
232
233 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
234 // .td files.
235 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
236 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
237 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
238
239 setOperationAction(ISD::ROTL, MVT::i32, Legal);
240 setOperationAction(ISD::ROTL, MVT::i16, Legal);
241 setOperationAction(ISD::ROTL, MVT::i8, Custom);
242
243 // SPU has no native version of shift left/right for i8
244 setOperationAction(ISD::SHL, MVT::i8, Custom);
245 setOperationAction(ISD::SRL, MVT::i8, Custom);
246 setOperationAction(ISD::SRA, MVT::i8, Custom);
247
248 // Make these operations legal and handle them during instruction selection:
249 setOperationAction(ISD::SHL, MVT::i64, Legal);
250 setOperationAction(ISD::SRL, MVT::i64, Legal);
251 setOperationAction(ISD::SRA, MVT::i64, Legal);
252
253 // Custom lower i8, i32 and i64 multiplications
254 setOperationAction(ISD::MUL, MVT::i8, Custom);
255 setOperationAction(ISD::MUL, MVT::i32, Legal);
256 setOperationAction(ISD::MUL, MVT::i64, Legal);
257
258 // Expand double-width multiplication
259 // FIXME: It would probably be reasonable to support some of these operations
260 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
261 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
262 setOperationAction(ISD::MULHU, MVT::i8, Expand);
263 setOperationAction(ISD::MULHS, MVT::i8, Expand);
264 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
265 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
266 setOperationAction(ISD::MULHU, MVT::i16, Expand);
267 setOperationAction(ISD::MULHS, MVT::i16, Expand);
268 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
269 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
270 setOperationAction(ISD::MULHU, MVT::i32, Expand);
271 setOperationAction(ISD::MULHS, MVT::i32, Expand);
272 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
273 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
274 setOperationAction(ISD::MULHU, MVT::i64, Expand);
275 setOperationAction(ISD::MULHS, MVT::i64, Expand);
276
277 // Need to custom handle (some) common i8, i64 math ops
278 setOperationAction(ISD::ADD, MVT::i8, Custom);
279 setOperationAction(ISD::ADD, MVT::i64, Legal);
280 setOperationAction(ISD::SUB, MVT::i8, Custom);
281 setOperationAction(ISD::SUB, MVT::i64, Legal);
282
283 // SPU does not have BSWAP. It does have i32 support CTLZ.
284 // CTPOP has to be custom lowered.
285 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
286 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
287
288 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
289 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
290 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
291 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
292 setOperationAction(ISD::CTPOP, MVT::i128, Expand);
293
294 setOperationAction(ISD::CTTZ , MVT::i8, Expand);
295 setOperationAction(ISD::CTTZ , MVT::i16, Expand);
296 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
297 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
298 setOperationAction(ISD::CTTZ , MVT::i128, Expand);
299
300 setOperationAction(ISD::CTLZ , MVT::i8, Promote);
301 setOperationAction(ISD::CTLZ , MVT::i16, Promote);
302 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
303 setOperationAction(ISD::CTLZ , MVT::i64, Expand);
304 setOperationAction(ISD::CTLZ , MVT::i128, Expand);
305
306 // SPU has a version of select that implements (a&~c)|(b&c), just like
307 // select ought to work:
308 setOperationAction(ISD::SELECT, MVT::i8, Legal);
309 setOperationAction(ISD::SELECT, MVT::i16, Legal);
310 setOperationAction(ISD::SELECT, MVT::i32, Legal);
311 setOperationAction(ISD::SELECT, MVT::i64, Legal);
312
313 setOperationAction(ISD::SETCC, MVT::i8, Legal);
314 setOperationAction(ISD::SETCC, MVT::i16, Legal);
315 setOperationAction(ISD::SETCC, MVT::i32, Legal);
316 setOperationAction(ISD::SETCC, MVT::i64, Legal);
317 setOperationAction(ISD::SETCC, MVT::f64, Custom);
318
319 // Custom lower i128 -> i64 truncates
320 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
321
322 // Custom lower i32/i64 -> i128 sign extend
323 setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
324
325 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
326 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
327 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
328 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
329 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
330 // to expand to a libcall, hence the custom lowering:
331 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
332 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
333 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
334 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
335 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
336 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
337
338 // FDIV on SPU requires custom lowering
339 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
340
341 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
342 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
343 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
344 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
345 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
346 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
347 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
348 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
349 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
350
351 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
352 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
353 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
354 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
355
356 // We cannot sextinreg(i1). Expand to shifts.
357 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
358
359 // We want to legalize GlobalAddress and ConstantPool nodes into the
360 // appropriate instructions to materialize the address.
361 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
362 ++sctype) {
363 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
364
365 setOperationAction(ISD::GlobalAddress, VT, Custom);
366 setOperationAction(ISD::ConstantPool, VT, Custom);
367 setOperationAction(ISD::JumpTable, VT, Custom);
368 }
369
370 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
371 setOperationAction(ISD::VASTART , MVT::Other, Custom);
372
373 // Use the default implementation.
374 setOperationAction(ISD::VAARG , MVT::Other, Expand);
375 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
376 setOperationAction(ISD::VAEND , MVT::Other, Expand);
377 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
378 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
379 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
380 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
381
382 // Cell SPU has instructions for converting between i64 and fp.
383 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
384 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
385
386 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
387 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
388
389 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
390 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
391
392 // First set operation action for all vector types to expand. Then we
393 // will selectively turn on ones that can be effectively codegen'd.
394 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
395 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
396 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
397 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
398 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
399 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
400
401 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
402 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
403 MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
404
405 // Set operation actions to legal types only.
406 if (!isTypeLegal(VT)) continue;
407
408 // add/sub are legal for all supported vector VT's.
409 setOperationAction(ISD::ADD, VT, Legal);
410 setOperationAction(ISD::SUB, VT, Legal);
411 // mul has to be custom lowered.
412 setOperationAction(ISD::MUL, VT, Legal);
413
414 setOperationAction(ISD::AND, VT, Legal);
415 setOperationAction(ISD::OR, VT, Legal);
416 setOperationAction(ISD::XOR, VT, Legal);
417 setOperationAction(ISD::LOAD, VT, Custom);
418 setOperationAction(ISD::SELECT, VT, Legal);
419 setOperationAction(ISD::STORE, VT, Custom);
420
421 // These operations need to be expanded:
422 setOperationAction(ISD::SDIV, VT, Expand);
423 setOperationAction(ISD::SREM, VT, Expand);
424 setOperationAction(ISD::UDIV, VT, Expand);
425 setOperationAction(ISD::UREM, VT, Expand);
426
427 // Custom lower build_vector, constant pool spills, insert and
428 // extract vector elements:
429 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
430 setOperationAction(ISD::ConstantPool, VT, Custom);
431 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
432 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
433 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
434 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
435 }
436
437 setOperationAction(ISD::AND, MVT::v16i8, Custom);
438 setOperationAction(ISD::OR, MVT::v16i8, Custom);
439 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
440 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
441
442 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
443
444 setBooleanContents(ZeroOrNegativeOneBooleanContent);
445 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
446
447 setStackPointerRegisterToSaveRestore(SPU::R1);
448
449 // We have target-specific dag combine patterns for the following nodes:
450 setTargetDAGCombine(ISD::ADD);
451 setTargetDAGCombine(ISD::ZERO_EXTEND);
452 setTargetDAGCombine(ISD::SIGN_EXTEND);
453 setTargetDAGCombine(ISD::ANY_EXTEND);
454
455 setMinFunctionAlignment(3);
456
457 computeRegisterProperties();
458
459 // Set pre-RA register scheduler default to BURR, which produces slightly
460 // better code than the default (could also be TDRR, but TargetLowering.h
461 // needs a mod to support that model):
462 setSchedulingPreference(Sched::RegPressure);
463 }
464
465 const char *
getTargetNodeName(unsigned Opcode) const466 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
467 {
468 if (node_names.empty()) {
469 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
470 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
471 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
472 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
473 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
474 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
475 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
476 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
477 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
478 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
479 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
480 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
481 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
482 node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
483 node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
484 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
485 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
486 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
487 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
488 "SPUISD::ROTBYTES_LEFT_BITS";
489 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
490 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
491 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
492 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
493 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
494 }
495
496 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
497
498 return ((i != node_names.end()) ? i->second : 0);
499 }
500
501 //===----------------------------------------------------------------------===//
502 // Return the Cell SPU's SETCC result type
503 //===----------------------------------------------------------------------===//
504
getSetCCResultType(EVT VT) const505 EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
506 // i8, i16 and i32 are valid SETCC result types
507 MVT::SimpleValueType retval;
508
509 switch(VT.getSimpleVT().SimpleTy){
510 case MVT::i1:
511 case MVT::i8:
512 retval = MVT::i8; break;
513 case MVT::i16:
514 retval = MVT::i16; break;
515 case MVT::i32:
516 default:
517 retval = MVT::i32;
518 }
519 return retval;
520 }
521
522 //===----------------------------------------------------------------------===//
523 // Calling convention code:
524 //===----------------------------------------------------------------------===//
525
526 #include "SPUGenCallingConv.inc"
527
528 //===----------------------------------------------------------------------===//
529 // LowerOperation implementation
530 //===----------------------------------------------------------------------===//
531
532 /// Custom lower loads for CellSPU
533 /*!
534 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
535 within a 16-byte block, we have to rotate to extract the requested element.
536
537 For extending loads, we also want to ensure that the following sequence is
538 emitted, e.g. for MVT::f32 extending load to MVT::f64:
539
540 \verbatim
541 %1 v16i8,ch = load
542 %2 v16i8,ch = rotate %1
543 %3 v4f8, ch = bitconvert %2
544 %4 f32 = vec2perfslot %3
545 %5 f64 = fp_extend %4
546 \endverbatim
547 */
548 static SDValue
LowerLOAD(SDValue Op,SelectionDAG & DAG,const SPUSubtarget * ST)549 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
550 LoadSDNode *LN = cast<LoadSDNode>(Op);
551 SDValue the_chain = LN->getChain();
552 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
553 EVT InVT = LN->getMemoryVT();
554 EVT OutVT = Op.getValueType();
555 ISD::LoadExtType ExtType = LN->getExtensionType();
556 unsigned alignment = LN->getAlignment();
557 int pso = prefslotOffset(InVT);
558 DebugLoc dl = Op.getDebugLoc();
559 EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
560 (128 / InVT.getSizeInBits()));
561
562 // two sanity checks
563 assert( LN->getAddressingMode() == ISD::UNINDEXED
564 && "we should get only UNINDEXED adresses");
565 // clean aligned loads can be selected as-is
566 if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
567 return SDValue();
568
569 // Get pointerinfos to the memory chunk(s) that contain the data to load
570 uint64_t mpi_offset = LN->getPointerInfo().Offset;
571 mpi_offset -= mpi_offset%16;
572 MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
573 MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
574
575 SDValue result;
576 SDValue basePtr = LN->getBasePtr();
577 SDValue rotate;
578
579 if ((alignment%16) == 0) {
580 ConstantSDNode *CN;
581
582 // Special cases for a known aligned load to simplify the base pointer
583 // and the rotation amount:
584 if (basePtr.getOpcode() == ISD::ADD
585 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
586 // Known offset into basePtr
587 int64_t offset = CN->getSExtValue();
588 int64_t rotamt = int64_t((offset & 0xf) - pso);
589
590 if (rotamt < 0)
591 rotamt += 16;
592
593 rotate = DAG.getConstant(rotamt, MVT::i16);
594
595 // Simplify the base pointer for this case:
596 basePtr = basePtr.getOperand(0);
597 if ((offset & ~0xf) > 0) {
598 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
599 basePtr,
600 DAG.getConstant((offset & ~0xf), PtrVT));
601 }
602 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
603 || (basePtr.getOpcode() == SPUISD::IndirectAddr
604 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
605 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
606 // Plain aligned a-form address: rotate into preferred slot
607 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
608 int64_t rotamt = -pso;
609 if (rotamt < 0)
610 rotamt += 16;
611 rotate = DAG.getConstant(rotamt, MVT::i16);
612 } else {
613 // Offset the rotate amount by the basePtr and the preferred slot
614 // byte offset
615 int64_t rotamt = -pso;
616 if (rotamt < 0)
617 rotamt += 16;
618 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
619 basePtr,
620 DAG.getConstant(rotamt, PtrVT));
621 }
622 } else {
623 // Unaligned load: must be more pessimistic about addressing modes:
624 if (basePtr.getOpcode() == ISD::ADD) {
625 MachineFunction &MF = DAG.getMachineFunction();
626 MachineRegisterInfo &RegInfo = MF.getRegInfo();
627 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
628 SDValue Flag;
629
630 SDValue Op0 = basePtr.getOperand(0);
631 SDValue Op1 = basePtr.getOperand(1);
632
633 if (isa<ConstantSDNode>(Op1)) {
634 // Convert the (add <ptr>, <const>) to an indirect address contained
635 // in a register. Note that this is done because we need to avoid
636 // creating a 0(reg) d-form address due to the SPU's block loads.
637 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
638 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
639 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
640 } else {
641 // Convert the (add <arg1>, <arg2>) to an indirect address, which
642 // will likely be lowered as a reg(reg) x-form address.
643 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
644 }
645 } else {
646 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
647 basePtr,
648 DAG.getConstant(0, PtrVT));
649 }
650
651 // Offset the rotate amount by the basePtr and the preferred slot
652 // byte offset
653 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
654 basePtr,
655 DAG.getConstant(-pso, PtrVT));
656 }
657
658 // Do the load as a i128 to allow possible shifting
659 SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
660 lowMemPtr,
661 LN->isVolatile(), LN->isNonTemporal(), 16);
662
663 // When the size is not greater than alignment we get all data with just
664 // one load
665 if (alignment >= InVT.getSizeInBits()/8) {
666 // Update the chain
667 the_chain = low.getValue(1);
668
669 // Rotate into the preferred slot:
670 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
671 low.getValue(0), rotate);
672
673 // Convert the loaded v16i8 vector to the appropriate vector type
674 // specified by the operand:
675 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
676 InVT, (128 / InVT.getSizeInBits()));
677 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
678 DAG.getNode(ISD::BITCAST, dl, vecVT, result));
679 }
680 // When alignment is less than the size, we might need (known only at
681 // run-time) two loads
682 // TODO: if the memory address is composed only from constants, we have
683 // extra kowledge, and might avoid the second load
684 else {
685 // storage position offset from lower 16 byte aligned memory chunk
686 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
687 basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
688 // get a registerfull of ones. (this implementation is a workaround: LLVM
689 // cannot handle 128 bit signed int constants)
690 SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
691 ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
692
693 SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
694 DAG.getNode(ISD::ADD, dl, PtrVT,
695 basePtr,
696 DAG.getConstant(16, PtrVT)),
697 highMemPtr,
698 LN->isVolatile(), LN->isNonTemporal(), 16);
699
700 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
701 high.getValue(1));
702
703 // Shift the (possible) high part right to compensate the misalignemnt.
704 // if there is no highpart (i.e. value is i64 and offset is 4), this
705 // will zero out the high value.
706 high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
707 DAG.getNode(ISD::SUB, dl, MVT::i32,
708 DAG.getConstant( 16, MVT::i32),
709 offset
710 ));
711
712 // Shift the low similarly
713 // TODO: add SPUISD::SHL_BYTES
714 low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
715
716 // Merge the two parts
717 result = DAG.getNode(ISD::BITCAST, dl, vecVT,
718 DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
719
720 if (!InVT.isVector()) {
721 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
722 }
723
724 }
725 // Handle extending loads by extending the scalar result:
726 if (ExtType == ISD::SEXTLOAD) {
727 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
728 } else if (ExtType == ISD::ZEXTLOAD) {
729 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
730 } else if (ExtType == ISD::EXTLOAD) {
731 unsigned NewOpc = ISD::ANY_EXTEND;
732
733 if (OutVT.isFloatingPoint())
734 NewOpc = ISD::FP_EXTEND;
735
736 result = DAG.getNode(NewOpc, dl, OutVT, result);
737 }
738
739 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
740 SDValue retops[2] = {
741 result,
742 the_chain
743 };
744
745 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
746 retops, sizeof(retops) / sizeof(retops[0]));
747 return result;
748 }
749
750 /// Custom lower stores for CellSPU
751 /*!
752 All CellSPU stores are aligned to 16-byte boundaries, so for elements
753 within a 16-byte block, we have to generate a shuffle to insert the
754 requested element into its place, then store the resulting block.
755 */
756 static SDValue
LowerSTORE(SDValue Op,SelectionDAG & DAG,const SPUSubtarget * ST)757 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
758 StoreSDNode *SN = cast<StoreSDNode>(Op);
759 SDValue Value = SN->getValue();
760 EVT VT = Value.getValueType();
761 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
762 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
763 DebugLoc dl = Op.getDebugLoc();
764 unsigned alignment = SN->getAlignment();
765 SDValue result;
766 EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
767 (128 / StVT.getSizeInBits()));
768 // Get pointerinfos to the memory chunk(s) that contain the data to load
769 uint64_t mpi_offset = SN->getPointerInfo().Offset;
770 mpi_offset -= mpi_offset%16;
771 MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
772 MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
773
774
775 // two sanity checks
776 assert( SN->getAddressingMode() == ISD::UNINDEXED
777 && "we should get only UNINDEXED adresses");
778 // clean aligned loads can be selected as-is
779 if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
780 return SDValue();
781
782 SDValue alignLoadVec;
783 SDValue basePtr = SN->getBasePtr();
784 SDValue the_chain = SN->getChain();
785 SDValue insertEltOffs;
786
787 if ((alignment%16) == 0) {
788 ConstantSDNode *CN;
789 // Special cases for a known aligned load to simplify the base pointer
790 // and insertion byte:
791 if (basePtr.getOpcode() == ISD::ADD
792 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
793 // Known offset into basePtr
794 int64_t offset = CN->getSExtValue();
795
796 // Simplify the base pointer for this case:
797 basePtr = basePtr.getOperand(0);
798 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
799 basePtr,
800 DAG.getConstant((offset & 0xf), PtrVT));
801
802 if ((offset & ~0xf) > 0) {
803 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
804 basePtr,
805 DAG.getConstant((offset & ~0xf), PtrVT));
806 }
807 } else {
808 // Otherwise, assume it's at byte 0 of basePtr
809 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
810 basePtr,
811 DAG.getConstant(0, PtrVT));
812 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
813 basePtr,
814 DAG.getConstant(0, PtrVT));
815 }
816 } else {
817 // Unaligned load: must be more pessimistic about addressing modes:
818 if (basePtr.getOpcode() == ISD::ADD) {
819 MachineFunction &MF = DAG.getMachineFunction();
820 MachineRegisterInfo &RegInfo = MF.getRegInfo();
821 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
822 SDValue Flag;
823
824 SDValue Op0 = basePtr.getOperand(0);
825 SDValue Op1 = basePtr.getOperand(1);
826
827 if (isa<ConstantSDNode>(Op1)) {
828 // Convert the (add <ptr>, <const>) to an indirect address contained
829 // in a register. Note that this is done because we need to avoid
830 // creating a 0(reg) d-form address due to the SPU's block loads.
831 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
832 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
833 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
834 } else {
835 // Convert the (add <arg1>, <arg2>) to an indirect address, which
836 // will likely be lowered as a reg(reg) x-form address.
837 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
838 }
839 } else {
840 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
841 basePtr,
842 DAG.getConstant(0, PtrVT));
843 }
844
845 // Insertion point is solely determined by basePtr's contents
846 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
847 basePtr,
848 DAG.getConstant(0, PtrVT));
849 }
850
851 // Load the lower part of the memory to which to store.
852 SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
853 lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
854
855 // if we don't need to store over the 16 byte boundary, one store suffices
856 if (alignment >= StVT.getSizeInBits()/8) {
857 // Update the chain
858 the_chain = low.getValue(1);
859
860 LoadSDNode *LN = cast<LoadSDNode>(low);
861 SDValue theValue = SN->getValue();
862
863 if (StVT != VT
864 && (theValue.getOpcode() == ISD::AssertZext
865 || theValue.getOpcode() == ISD::AssertSext)) {
866 // Drill down and get the value for zero- and sign-extended
867 // quantities
868 theValue = theValue.getOperand(0);
869 }
870
871 // If the base pointer is already a D-form address, then just create
872 // a new D-form address with a slot offset and the orignal base pointer.
873 // Otherwise generate a D-form address with the slot offset relative
874 // to the stack pointer, which is always aligned.
875 #if !defined(NDEBUG)
876 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
877 errs() << "CellSPU LowerSTORE: basePtr = ";
878 basePtr.getNode()->dump(&DAG);
879 errs() << "\n";
880 }
881 #endif
882
883 SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
884 insertEltOffs);
885 SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
886 theValue);
887
888 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
889 vectorizeOp, low,
890 DAG.getNode(ISD::BITCAST, dl,
891 MVT::v4i32, insertEltOp));
892
893 result = DAG.getStore(the_chain, dl, result, basePtr,
894 lowMemPtr,
895 LN->isVolatile(), LN->isNonTemporal(),
896 16);
897
898 }
899 // do the store when it might cross the 16 byte memory access boundary.
900 else {
901 // TODO issue a warning if SN->isVolatile()== true? This is likely not
902 // what the user wanted.
903
904 // address offset from nearest lower 16byte alinged address
905 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
906 SN->getBasePtr(),
907 DAG.getConstant(0xf, MVT::i32));
908 // 16 - offset
909 SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
910 DAG.getConstant( 16, MVT::i32),
911 offset);
912 // 16 - sizeof(Value)
913 SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
914 DAG.getConstant( 16, MVT::i32),
915 DAG.getConstant( VT.getSizeInBits()/8,
916 MVT::i32));
917 // get a registerfull of ones
918 SDValue ones = DAG.getConstant(-1, MVT::v4i32);
919 ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
920
921 // Create the 128 bit masks that have ones where the data to store is
922 // located.
923 SDValue lowmask, himask;
924 // if the value to store don't fill up the an entire 128 bits, zero
925 // out the last bits of the mask so that only the value we want to store
926 // is masked.
927 // this is e.g. in the case of store i32, align 2
928 if (!VT.isVector()){
929 Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
930 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
931 lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
932 surplus);
933 Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
934 Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
935
936 }
937 else {
938 lowmask = ones;
939 Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
940 }
941 // this will zero, if there are no data that goes to the high quad
942 himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
943 offset_compl);
944 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
945 offset);
946
947 // Load in the old data and zero out the parts that will be overwritten with
948 // the new data to store.
949 SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
950 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
951 DAG.getConstant( 16, PtrVT)),
952 highMemPtr,
953 SN->isVolatile(), SN->isNonTemporal(), 16);
954 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
955 hi.getValue(1));
956
957 low = DAG.getNode(ISD::AND, dl, MVT::i128,
958 DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
959 DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
960 hi = DAG.getNode(ISD::AND, dl, MVT::i128,
961 DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
962 DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
963
964 // Shift the Value to store into place. rlow contains the parts that go to
965 // the lower memory chunk, rhi has the parts that go to the upper one.
966 SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
967 rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
968 SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
969 offset_compl);
970
971 // Merge the old data and the new data and store the results
972 // Need to convert vectors here to integer as 'OR'ing floats assert
973 rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
974 DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
975 DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
976 rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
977 DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
978 DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
979
980 low = DAG.getStore(the_chain, dl, rlow, basePtr,
981 lowMemPtr,
982 SN->isVolatile(), SN->isNonTemporal(), 16);
983 hi = DAG.getStore(the_chain, dl, rhi,
984 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
985 DAG.getConstant( 16, PtrVT)),
986 highMemPtr,
987 SN->isVolatile(), SN->isNonTemporal(), 16);
988 result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
989 hi.getValue(0));
990 }
991
992 return result;
993 }
994
995 //! Generate the address of a constant pool entry.
996 static SDValue
LowerConstantPool(SDValue Op,SelectionDAG & DAG,const SPUSubtarget * ST)997 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
998 EVT PtrVT = Op.getValueType();
999 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1000 const Constant *C = CP->getConstVal();
1001 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
1002 SDValue Zero = DAG.getConstant(0, PtrVT);
1003 const TargetMachine &TM = DAG.getTarget();
1004 // FIXME there is no actual debug info here
1005 DebugLoc dl = Op.getDebugLoc();
1006
1007 if (TM.getRelocationModel() == Reloc::Static) {
1008 if (!ST->usingLargeMem()) {
1009 // Just return the SDValue with the constant pool address in it.
1010 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
1011 } else {
1012 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
1013 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
1014 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1015 }
1016 }
1017
1018 llvm_unreachable("LowerConstantPool: Relocation model other than static"
1019 " not supported.");
1020 return SDValue();
1021 }
1022
1023 //! Alternate entry point for generating the address of a constant pool entry
1024 SDValue
LowerConstantPool(SDValue Op,SelectionDAG & DAG,const SPUTargetMachine & TM)1025 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
1026 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
1027 }
1028
1029 static SDValue
LowerJumpTable(SDValue Op,SelectionDAG & DAG,const SPUSubtarget * ST)1030 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1031 EVT PtrVT = Op.getValueType();
1032 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1033 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1034 SDValue Zero = DAG.getConstant(0, PtrVT);
1035 const TargetMachine &TM = DAG.getTarget();
1036 // FIXME there is no actual debug info here
1037 DebugLoc dl = Op.getDebugLoc();
1038
1039 if (TM.getRelocationModel() == Reloc::Static) {
1040 if (!ST->usingLargeMem()) {
1041 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
1042 } else {
1043 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
1044 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
1045 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1046 }
1047 }
1048
1049 llvm_unreachable("LowerJumpTable: Relocation model other than static"
1050 " not supported.");
1051 return SDValue();
1052 }
1053
1054 static SDValue
LowerGlobalAddress(SDValue Op,SelectionDAG & DAG,const SPUSubtarget * ST)1055 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1056 EVT PtrVT = Op.getValueType();
1057 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1058 const GlobalValue *GV = GSDN->getGlobal();
1059 SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
1060 PtrVT, GSDN->getOffset());
1061 const TargetMachine &TM = DAG.getTarget();
1062 SDValue Zero = DAG.getConstant(0, PtrVT);
1063 // FIXME there is no actual debug info here
1064 DebugLoc dl = Op.getDebugLoc();
1065
1066 if (TM.getRelocationModel() == Reloc::Static) {
1067 if (!ST->usingLargeMem()) {
1068 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
1069 } else {
1070 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
1071 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
1072 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1073 }
1074 } else {
1075 report_fatal_error("LowerGlobalAddress: Relocation model other than static"
1076 "not supported.");
1077 /*NOTREACHED*/
1078 }
1079
1080 return SDValue();
1081 }
1082
1083 //! Custom lower double precision floating point constants
1084 static SDValue
LowerConstantFP(SDValue Op,SelectionDAG & DAG)1085 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
1086 EVT VT = Op.getValueType();
1087 // FIXME there is no actual debug info here
1088 DebugLoc dl = Op.getDebugLoc();
1089
1090 if (VT == MVT::f64) {
1091 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1092
1093 assert((FP != 0) &&
1094 "LowerConstantFP: Node is not ConstantFPSDNode");
1095
1096 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1097 SDValue T = DAG.getConstant(dbits, MVT::i64);
1098 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1099 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1100 DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
1101 }
1102
1103 return SDValue();
1104 }
1105
1106 SDValue
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,DebugLoc dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const1107 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1108 CallingConv::ID CallConv, bool isVarArg,
1109 const SmallVectorImpl<ISD::InputArg>
1110 &Ins,
1111 DebugLoc dl, SelectionDAG &DAG,
1112 SmallVectorImpl<SDValue> &InVals)
1113 const {
1114
1115 MachineFunction &MF = DAG.getMachineFunction();
1116 MachineFrameInfo *MFI = MF.getFrameInfo();
1117 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1118 SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
1119
1120 unsigned ArgOffset = SPUFrameLowering::minStackSize();
1121 unsigned ArgRegIdx = 0;
1122 unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
1123
1124 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1125
1126 SmallVector<CCValAssign, 16> ArgLocs;
1127 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1128 getTargetMachine(), ArgLocs, *DAG.getContext());
1129 // FIXME: allow for other calling conventions
1130 CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
1131
1132 // Add DAG nodes to load the arguments or copy them out of registers.
1133 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1134 EVT ObjectVT = Ins[ArgNo].VT;
1135 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1136 SDValue ArgVal;
1137 CCValAssign &VA = ArgLocs[ArgNo];
1138
1139 if (VA.isRegLoc()) {
1140 const TargetRegisterClass *ArgRegClass;
1141
1142 switch (ObjectVT.getSimpleVT().SimpleTy) {
1143 default:
1144 report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1145 Twine(ObjectVT.getEVTString()));
1146 case MVT::i8:
1147 ArgRegClass = &SPU::R8CRegClass;
1148 break;
1149 case MVT::i16:
1150 ArgRegClass = &SPU::R16CRegClass;
1151 break;
1152 case MVT::i32:
1153 ArgRegClass = &SPU::R32CRegClass;
1154 break;
1155 case MVT::i64:
1156 ArgRegClass = &SPU::R64CRegClass;
1157 break;
1158 case MVT::i128:
1159 ArgRegClass = &SPU::GPRCRegClass;
1160 break;
1161 case MVT::f32:
1162 ArgRegClass = &SPU::R32FPRegClass;
1163 break;
1164 case MVT::f64:
1165 ArgRegClass = &SPU::R64FPRegClass;
1166 break;
1167 case MVT::v2f64:
1168 case MVT::v4f32:
1169 case MVT::v2i64:
1170 case MVT::v4i32:
1171 case MVT::v8i16:
1172 case MVT::v16i8:
1173 ArgRegClass = &SPU::VECREGRegClass;
1174 break;
1175 }
1176
1177 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1178 RegInfo.addLiveIn(VA.getLocReg(), VReg);
1179 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1180 ++ArgRegIdx;
1181 } else {
1182 // We need to load the argument to a virtual register if we determined
1183 // above that we ran out of physical registers of the appropriate type
1184 // or we're forced to do vararg
1185 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
1186 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1187 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
1188 false, false, 0);
1189 ArgOffset += StackSlotSize;
1190 }
1191
1192 InVals.push_back(ArgVal);
1193 // Update the chain
1194 Chain = ArgVal.getOperand(0);
1195 }
1196
1197 // vararg handling:
1198 if (isVarArg) {
1199 // FIXME: we should be able to query the argument registers from
1200 // tablegen generated code.
1201 static const unsigned ArgRegs[] = {
1202 SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
1203 SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
1204 SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
1205 SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
1206 SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
1207 SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
1208 SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
1209 SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
1210 SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
1211 SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
1212 SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
1213 };
1214 // size of ArgRegs array
1215 unsigned NumArgRegs = 77;
1216
1217 // We will spill (79-3)+1 registers to the stack
1218 SmallVector<SDValue, 79-3+1> MemOps;
1219
1220 // Create the frame slot
1221 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1222 FuncInfo->setVarArgsFrameIndex(
1223 MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
1224 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1225 unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
1226 SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1227 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
1228 false, false, 0);
1229 Chain = Store.getOperand(0);
1230 MemOps.push_back(Store);
1231
1232 // Increment address by stack slot size for the next stored argument
1233 ArgOffset += StackSlotSize;
1234 }
1235 if (!MemOps.empty())
1236 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1237 &MemOps[0], MemOps.size());
1238 }
1239
1240 return Chain;
1241 }
1242
1243 /// isLSAAddress - Return the immediate to use if the specified
1244 /// value is representable as a LSA address.
isLSAAddress(SDValue Op,SelectionDAG & DAG)1245 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1246 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1247 if (!C) return 0;
1248
1249 int Addr = C->getZExtValue();
1250 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1251 (Addr << 14 >> 14) != Addr)
1252 return 0; // Top 14 bits have to be sext of immediate.
1253
1254 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1255 }
1256
1257 SDValue
LowerCall(SDValue Chain,SDValue Callee,CallingConv::ID CallConv,bool isVarArg,bool & isTailCall,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SmallVectorImpl<ISD::InputArg> & Ins,DebugLoc dl,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const1258 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1259 CallingConv::ID CallConv, bool isVarArg,
1260 bool &isTailCall,
1261 const SmallVectorImpl<ISD::OutputArg> &Outs,
1262 const SmallVectorImpl<SDValue> &OutVals,
1263 const SmallVectorImpl<ISD::InputArg> &Ins,
1264 DebugLoc dl, SelectionDAG &DAG,
1265 SmallVectorImpl<SDValue> &InVals) const {
1266 // CellSPU target does not yet support tail call optimization.
1267 isTailCall = false;
1268
1269 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1270 unsigned NumOps = Outs.size();
1271 unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
1272
1273 SmallVector<CCValAssign, 16> ArgLocs;
1274 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1275 getTargetMachine(), ArgLocs, *DAG.getContext());
1276 // FIXME: allow for other calling conventions
1277 CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
1278
1279 const unsigned NumArgRegs = ArgLocs.size();
1280
1281
1282 // Handy pointer type
1283 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1284
1285 // Set up a copy of the stack pointer for use loading and storing any
1286 // arguments that may not fit in the registers available for argument
1287 // passing.
1288 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1289
1290 // Figure out which arguments are going to go in registers, and which in
1291 // memory.
1292 unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
1293 unsigned ArgRegIdx = 0;
1294
1295 // Keep track of registers passing arguments
1296 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1297 // And the arguments passed on the stack
1298 SmallVector<SDValue, 8> MemOpChains;
1299
1300 for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
1301 SDValue Arg = OutVals[ArgRegIdx];
1302 CCValAssign &VA = ArgLocs[ArgRegIdx];
1303
1304 // PtrOff will be used to store the current argument to the stack if a
1305 // register cannot be found for it.
1306 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1307 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1308
1309 switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1310 default: llvm_unreachable("Unexpected ValueType for argument!");
1311 case MVT::i8:
1312 case MVT::i16:
1313 case MVT::i32:
1314 case MVT::i64:
1315 case MVT::i128:
1316 case MVT::f32:
1317 case MVT::f64:
1318 case MVT::v2i64:
1319 case MVT::v2f64:
1320 case MVT::v4f32:
1321 case MVT::v4i32:
1322 case MVT::v8i16:
1323 case MVT::v16i8:
1324 if (ArgRegIdx != NumArgRegs) {
1325 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1326 } else {
1327 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1328 MachinePointerInfo(),
1329 false, false, 0));
1330 ArgOffset += StackSlotSize;
1331 }
1332 break;
1333 }
1334 }
1335
1336 // Accumulate how many bytes are to be pushed on the stack, including the
1337 // linkage area, and parameter passing area. According to the SPU ABI,
1338 // we minimally need space for [LR] and [SP].
1339 unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
1340
1341 // Insert a call sequence start
1342 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1343 true));
1344
1345 if (!MemOpChains.empty()) {
1346 // Adjust the stack pointer for the stack arguments.
1347 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1348 &MemOpChains[0], MemOpChains.size());
1349 }
1350
1351 // Build a sequence of copy-to-reg nodes chained together with token chain
1352 // and flag operands which copy the outgoing args into the appropriate regs.
1353 SDValue InFlag;
1354 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1355 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1356 RegsToPass[i].second, InFlag);
1357 InFlag = Chain.getValue(1);
1358 }
1359
1360 SmallVector<SDValue, 8> Ops;
1361 unsigned CallOpc = SPUISD::CALL;
1362
1363 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1364 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1365 // node so that legalize doesn't hack it.
1366 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1367 const GlobalValue *GV = G->getGlobal();
1368 EVT CalleeVT = Callee.getValueType();
1369 SDValue Zero = DAG.getConstant(0, PtrVT);
1370 SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
1371
1372 if (!ST->usingLargeMem()) {
1373 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1374 // style calls, otherwise, external symbols are BRASL calls. This assumes
1375 // that declared/defined symbols are in the same compilation unit and can
1376 // be reached through PC-relative jumps.
1377 //
1378 // NOTE:
1379 // This may be an unsafe assumption for JIT and really large compilation
1380 // units.
1381 if (GV->isDeclaration()) {
1382 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1383 } else {
1384 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1385 }
1386 } else {
1387 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1388 // address pairs:
1389 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1390 }
1391 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1392 EVT CalleeVT = Callee.getValueType();
1393 SDValue Zero = DAG.getConstant(0, PtrVT);
1394 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1395 Callee.getValueType());
1396
1397 if (!ST->usingLargeMem()) {
1398 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1399 } else {
1400 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1401 }
1402 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1403 // If this is an absolute destination address that appears to be a legal
1404 // local store address, use the munged value.
1405 Callee = SDValue(Dest, 0);
1406 }
1407
1408 Ops.push_back(Chain);
1409 Ops.push_back(Callee);
1410
1411 // Add argument registers to the end of the list so that they are known live
1412 // into the call.
1413 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1414 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1415 RegsToPass[i].second.getValueType()));
1416
1417 if (InFlag.getNode())
1418 Ops.push_back(InFlag);
1419 // Returns a chain and a flag for retval copy to use.
1420 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
1421 &Ops[0], Ops.size());
1422 InFlag = Chain.getValue(1);
1423
1424 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1425 DAG.getIntPtrConstant(0, true), InFlag);
1426 if (!Ins.empty())
1427 InFlag = Chain.getValue(1);
1428
1429 // If the function returns void, just return the chain.
1430 if (Ins.empty())
1431 return Chain;
1432
1433 // Now handle the return value(s)
1434 SmallVector<CCValAssign, 16> RVLocs;
1435 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1436 getTargetMachine(), RVLocs, *DAG.getContext());
1437 CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
1438
1439
1440 // If the call has results, copy the values out of the ret val registers.
1441 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1442 CCValAssign VA = RVLocs[i];
1443
1444 SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1445 InFlag);
1446 Chain = Val.getValue(1);
1447 InFlag = Val.getValue(2);
1448 InVals.push_back(Val);
1449 }
1450
1451 return Chain;
1452 }
1453
1454 SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,DebugLoc dl,SelectionDAG & DAG) const1455 SPUTargetLowering::LowerReturn(SDValue Chain,
1456 CallingConv::ID CallConv, bool isVarArg,
1457 const SmallVectorImpl<ISD::OutputArg> &Outs,
1458 const SmallVectorImpl<SDValue> &OutVals,
1459 DebugLoc dl, SelectionDAG &DAG) const {
1460
1461 SmallVector<CCValAssign, 16> RVLocs;
1462 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1463 getTargetMachine(), RVLocs, *DAG.getContext());
1464 CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1465
1466 // If this is the first return lowered for this function, add the regs to the
1467 // liveout set for the function.
1468 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1469 for (unsigned i = 0; i != RVLocs.size(); ++i)
1470 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1471 }
1472
1473 SDValue Flag;
1474
1475 // Copy the result values into the output registers.
1476 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1477 CCValAssign &VA = RVLocs[i];
1478 assert(VA.isRegLoc() && "Can only return in registers!");
1479 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1480 OutVals[i], Flag);
1481 Flag = Chain.getValue(1);
1482 }
1483
1484 if (Flag.getNode())
1485 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1486 else
1487 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1488 }
1489
1490
1491 //===----------------------------------------------------------------------===//
1492 // Vector related lowering:
1493 //===----------------------------------------------------------------------===//
1494
1495 static ConstantSDNode *
getVecImm(SDNode * N)1496 getVecImm(SDNode *N) {
1497 SDValue OpVal(0, 0);
1498
1499 // Check to see if this buildvec has a single non-undef value in its elements.
1500 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1501 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1502 if (OpVal.getNode() == 0)
1503 OpVal = N->getOperand(i);
1504 else if (OpVal != N->getOperand(i))
1505 return 0;
1506 }
1507
1508 if (OpVal.getNode() != 0) {
1509 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1510 return CN;
1511 }
1512 }
1513
1514 return 0;
1515 }
1516
1517 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1518 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1519 /// constant
get_vec_u18imm(SDNode * N,SelectionDAG & DAG,EVT ValueType)1520 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1521 EVT ValueType) {
1522 if (ConstantSDNode *CN = getVecImm(N)) {
1523 uint64_t Value = CN->getZExtValue();
1524 if (ValueType == MVT::i64) {
1525 uint64_t UValue = CN->getZExtValue();
1526 uint32_t upper = uint32_t(UValue >> 32);
1527 uint32_t lower = uint32_t(UValue);
1528 if (upper != lower)
1529 return SDValue();
1530 Value = Value >> 32;
1531 }
1532 if (Value <= 0x3ffff)
1533 return DAG.getTargetConstant(Value, ValueType);
1534 }
1535
1536 return SDValue();
1537 }
1538
1539 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1540 /// and the value fits into a signed 16-bit constant, and if so, return the
1541 /// constant
get_vec_i16imm(SDNode * N,SelectionDAG & DAG,EVT ValueType)1542 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1543 EVT ValueType) {
1544 if (ConstantSDNode *CN = getVecImm(N)) {
1545 int64_t Value = CN->getSExtValue();
1546 if (ValueType == MVT::i64) {
1547 uint64_t UValue = CN->getZExtValue();
1548 uint32_t upper = uint32_t(UValue >> 32);
1549 uint32_t lower = uint32_t(UValue);
1550 if (upper != lower)
1551 return SDValue();
1552 Value = Value >> 32;
1553 }
1554 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1555 return DAG.getTargetConstant(Value, ValueType);
1556 }
1557 }
1558
1559 return SDValue();
1560 }
1561
1562 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1563 /// and the value fits into a signed 10-bit constant, and if so, return the
1564 /// constant
get_vec_i10imm(SDNode * N,SelectionDAG & DAG,EVT ValueType)1565 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1566 EVT ValueType) {
1567 if (ConstantSDNode *CN = getVecImm(N)) {
1568 int64_t Value = CN->getSExtValue();
1569 if (ValueType == MVT::i64) {
1570 uint64_t UValue = CN->getZExtValue();
1571 uint32_t upper = uint32_t(UValue >> 32);
1572 uint32_t lower = uint32_t(UValue);
1573 if (upper != lower)
1574 return SDValue();
1575 Value = Value >> 32;
1576 }
1577 if (isInt<10>(Value))
1578 return DAG.getTargetConstant(Value, ValueType);
1579 }
1580
1581 return SDValue();
1582 }
1583
1584 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1585 /// and the value fits into a signed 8-bit constant, and if so, return the
1586 /// constant.
1587 ///
1588 /// @note: The incoming vector is v16i8 because that's the only way we can load
1589 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1590 /// same value.
get_vec_i8imm(SDNode * N,SelectionDAG & DAG,EVT ValueType)1591 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1592 EVT ValueType) {
1593 if (ConstantSDNode *CN = getVecImm(N)) {
1594 int Value = (int) CN->getZExtValue();
1595 if (ValueType == MVT::i16
1596 && Value <= 0xffff /* truncated from uint64_t */
1597 && ((short) Value >> 8) == ((short) Value & 0xff))
1598 return DAG.getTargetConstant(Value & 0xff, ValueType);
1599 else if (ValueType == MVT::i8
1600 && (Value & 0xff) == Value)
1601 return DAG.getTargetConstant(Value, ValueType);
1602 }
1603
1604 return SDValue();
1605 }
1606
1607 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1608 /// and the value fits into a signed 16-bit constant, and if so, return the
1609 /// constant
get_ILHUvec_imm(SDNode * N,SelectionDAG & DAG,EVT ValueType)1610 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1611 EVT ValueType) {
1612 if (ConstantSDNode *CN = getVecImm(N)) {
1613 uint64_t Value = CN->getZExtValue();
1614 if ((ValueType == MVT::i32
1615 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1616 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1617 return DAG.getTargetConstant(Value >> 16, ValueType);
1618 }
1619
1620 return SDValue();
1621 }
1622
1623 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
get_v4i32_imm(SDNode * N,SelectionDAG & DAG)1624 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1625 if (ConstantSDNode *CN = getVecImm(N)) {
1626 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1627 }
1628
1629 return SDValue();
1630 }
1631
1632 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
get_v2i64_imm(SDNode * N,SelectionDAG & DAG)1633 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1634 if (ConstantSDNode *CN = getVecImm(N)) {
1635 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1636 }
1637
1638 return SDValue();
1639 }
1640
1641 //! Lower a BUILD_VECTOR instruction creatively:
1642 static SDValue
LowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG)1643 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1644 EVT VT = Op.getValueType();
1645 EVT EltVT = VT.getVectorElementType();
1646 DebugLoc dl = Op.getDebugLoc();
1647 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1648 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1649 unsigned minSplatBits = EltVT.getSizeInBits();
1650
1651 if (minSplatBits < 16)
1652 minSplatBits = 16;
1653
1654 APInt APSplatBits, APSplatUndef;
1655 unsigned SplatBitSize;
1656 bool HasAnyUndefs;
1657
1658 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1659 HasAnyUndefs, minSplatBits)
1660 || minSplatBits < SplatBitSize)
1661 return SDValue(); // Wasn't a constant vector or splat exceeded min
1662
1663 uint64_t SplatBits = APSplatBits.getZExtValue();
1664
1665 switch (VT.getSimpleVT().SimpleTy) {
1666 default:
1667 report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1668 Twine(VT.getEVTString()));
1669 /*NOTREACHED*/
1670 case MVT::v4f32: {
1671 uint32_t Value32 = uint32_t(SplatBits);
1672 assert(SplatBitSize == 32
1673 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1674 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1675 SDValue T = DAG.getConstant(Value32, MVT::i32);
1676 return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
1677 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1678 break;
1679 }
1680 case MVT::v2f64: {
1681 uint64_t f64val = uint64_t(SplatBits);
1682 assert(SplatBitSize == 64
1683 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1684 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1685 SDValue T = DAG.getConstant(f64val, MVT::i64);
1686 return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
1687 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1688 break;
1689 }
1690 case MVT::v16i8: {
1691 // 8-bit constants have to be expanded to 16-bits
1692 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1693 SmallVector<SDValue, 8> Ops;
1694
1695 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1696 return DAG.getNode(ISD::BITCAST, dl, VT,
1697 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1698 }
1699 case MVT::v8i16: {
1700 unsigned short Value16 = SplatBits;
1701 SDValue T = DAG.getConstant(Value16, EltVT);
1702 SmallVector<SDValue, 8> Ops;
1703
1704 Ops.assign(8, T);
1705 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1706 }
1707 case MVT::v4i32: {
1708 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1709 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1710 }
1711 case MVT::v2i64: {
1712 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1713 }
1714 }
1715
1716 return SDValue();
1717 }
1718
1719 /*!
1720 */
1721 SDValue
LowerV2I64Splat(EVT OpVT,SelectionDAG & DAG,uint64_t SplatVal,DebugLoc dl)1722 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1723 DebugLoc dl) {
1724 uint32_t upper = uint32_t(SplatVal >> 32);
1725 uint32_t lower = uint32_t(SplatVal);
1726
1727 if (upper == lower) {
1728 // Magic constant that can be matched by IL, ILA, et. al.
1729 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1730 return DAG.getNode(ISD::BITCAST, dl, OpVT,
1731 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1732 Val, Val, Val, Val));
1733 } else {
1734 bool upper_special, lower_special;
1735
1736 // NOTE: This code creates common-case shuffle masks that can be easily
1737 // detected as common expressions. It is not attempting to create highly
1738 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1739
1740 // Detect if the upper or lower half is a special shuffle mask pattern:
1741 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1742 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1743
1744 // Both upper and lower are special, lower to a constant pool load:
1745 if (lower_special && upper_special) {
1746 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1747 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1748 SplatValCN, SplatValCN);
1749 }
1750
1751 SDValue LO32;
1752 SDValue HI32;
1753 SmallVector<SDValue, 16> ShufBytes;
1754 SDValue Result;
1755
1756 // Create lower vector if not a special pattern
1757 if (!lower_special) {
1758 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1759 LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
1760 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1761 LO32C, LO32C, LO32C, LO32C));
1762 }
1763
1764 // Create upper vector if not a special pattern
1765 if (!upper_special) {
1766 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1767 HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
1768 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1769 HI32C, HI32C, HI32C, HI32C));
1770 }
1771
1772 // If either upper or lower are special, then the two input operands are
1773 // the same (basically, one of them is a "don't care")
1774 if (lower_special)
1775 LO32 = HI32;
1776 if (upper_special)
1777 HI32 = LO32;
1778
1779 for (int i = 0; i < 4; ++i) {
1780 uint64_t val = 0;
1781 for (int j = 0; j < 4; ++j) {
1782 SDValue V;
1783 bool process_upper, process_lower;
1784 val <<= 8;
1785 process_upper = (upper_special && (i & 1) == 0);
1786 process_lower = (lower_special && (i & 1) == 1);
1787
1788 if (process_upper || process_lower) {
1789 if ((process_upper && upper == 0)
1790 || (process_lower && lower == 0))
1791 val |= 0x80;
1792 else if ((process_upper && upper == 0xffffffff)
1793 || (process_lower && lower == 0xffffffff))
1794 val |= 0xc0;
1795 else if ((process_upper && upper == 0x80000000)
1796 || (process_lower && lower == 0x80000000))
1797 val |= (j == 0 ? 0xe0 : 0x80);
1798 } else
1799 val |= i * 4 + j + ((i & 1) * 16);
1800 }
1801
1802 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1803 }
1804
1805 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1806 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1807 &ShufBytes[0], ShufBytes.size()));
1808 }
1809 }
1810
1811 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1812 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1813 /// permutation vector, V3, is monotonically increasing with one "exception"
1814 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1815 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1816 /// In either case, the net result is going to eventually invoke SHUFB to
1817 /// permute/shuffle the bytes from V1 and V2.
1818 /// \note
1819 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1820 /// control word for byte/halfword/word insertion. This takes care of a single
1821 /// element move from V2 into V1.
1822 /// \note
1823 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
LowerVECTOR_SHUFFLE(SDValue Op,SelectionDAG & DAG)1824 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1825 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1826 SDValue V1 = Op.getOperand(0);
1827 SDValue V2 = Op.getOperand(1);
1828 DebugLoc dl = Op.getDebugLoc();
1829
1830 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1831
1832 // If we have a single element being moved from V1 to V2, this can be handled
1833 // using the C*[DX] compute mask instructions, but the vector elements have
1834 // to be monotonically increasing with one exception element, and the source
1835 // slot of the element to move must be the same as the destination.
1836 EVT VecVT = V1.getValueType();
1837 EVT EltVT = VecVT.getVectorElementType();
1838 unsigned EltsFromV2 = 0;
1839 unsigned V2EltOffset = 0;
1840 unsigned V2EltIdx0 = 0;
1841 unsigned CurrElt = 0;
1842 unsigned MaxElts = VecVT.getVectorNumElements();
1843 unsigned PrevElt = 0;
1844 bool monotonic = true;
1845 bool rotate = true;
1846 int rotamt=0;
1847 EVT maskVT; // which of the c?d instructions to use
1848
1849 if (EltVT == MVT::i8) {
1850 V2EltIdx0 = 16;
1851 maskVT = MVT::v16i8;
1852 } else if (EltVT == MVT::i16) {
1853 V2EltIdx0 = 8;
1854 maskVT = MVT::v8i16;
1855 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1856 V2EltIdx0 = 4;
1857 maskVT = MVT::v4i32;
1858 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1859 V2EltIdx0 = 2;
1860 maskVT = MVT::v2i64;
1861 } else
1862 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1863
1864 for (unsigned i = 0; i != MaxElts; ++i) {
1865 if (SVN->getMaskElt(i) < 0)
1866 continue;
1867
1868 unsigned SrcElt = SVN->getMaskElt(i);
1869
1870 if (monotonic) {
1871 if (SrcElt >= V2EltIdx0) {
1872 // TODO: optimize for the monotonic case when several consecutive
1873 // elements are taken form V2. Do we ever get such a case?
1874 if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
1875 V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
1876 else
1877 monotonic = false;
1878 ++EltsFromV2;
1879 } else if (CurrElt != SrcElt) {
1880 monotonic = false;
1881 }
1882
1883 ++CurrElt;
1884 }
1885
1886 if (rotate) {
1887 if (PrevElt > 0 && SrcElt < MaxElts) {
1888 if ((PrevElt == SrcElt - 1)
1889 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1890 PrevElt = SrcElt;
1891 } else {
1892 rotate = false;
1893 }
1894 } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
1895 // First time or after a "wrap around"
1896 rotamt = SrcElt-i;
1897 PrevElt = SrcElt;
1898 } else {
1899 // This isn't a rotation, takes elements from vector 2
1900 rotate = false;
1901 }
1902 }
1903 }
1904
1905 if (EltsFromV2 == 1 && monotonic) {
1906 // Compute mask and shuffle
1907 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1908
1909 // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
1910 // R1 ($sp) is used here only as it is guaranteed to have last bits zero
1911 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
1912 DAG.getRegister(SPU::R1, PtrVT),
1913 DAG.getConstant(V2EltOffset, MVT::i32));
1914 SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
1915 maskVT, Pointer);
1916
1917 // Use shuffle mask in SHUFB synthetic instruction:
1918 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1919 ShufMaskOp);
1920 } else if (rotate) {
1921 if (rotamt < 0)
1922 rotamt +=MaxElts;
1923 rotamt *= EltVT.getSizeInBits()/8;
1924 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1925 V1, DAG.getConstant(rotamt, MVT::i16));
1926 } else {
1927 // Convert the SHUFFLE_VECTOR mask's input element units to the
1928 // actual bytes.
1929 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1930
1931 SmallVector<SDValue, 16> ResultMask;
1932 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1933 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1934
1935 for (unsigned j = 0; j < BytesPerElement; ++j)
1936 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1937 }
1938 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1939 &ResultMask[0], ResultMask.size());
1940 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1941 }
1942 }
1943
LowerSCALAR_TO_VECTOR(SDValue Op,SelectionDAG & DAG)1944 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1945 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1946 DebugLoc dl = Op.getDebugLoc();
1947
1948 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1949 // For a constant, build the appropriate constant vector, which will
1950 // eventually simplify to a vector register load.
1951
1952 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1953 SmallVector<SDValue, 16> ConstVecValues;
1954 EVT VT;
1955 size_t n_copies;
1956
1957 // Create a constant vector:
1958 switch (Op.getValueType().getSimpleVT().SimpleTy) {
1959 default: llvm_unreachable("Unexpected constant value type in "
1960 "LowerSCALAR_TO_VECTOR");
1961 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1962 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1963 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1964 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1965 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1966 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1967 }
1968
1969 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1970 for (size_t j = 0; j < n_copies; ++j)
1971 ConstVecValues.push_back(CValue);
1972
1973 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1974 &ConstVecValues[0], ConstVecValues.size());
1975 } else {
1976 // Otherwise, copy the value from one register to another:
1977 switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1978 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1979 case MVT::i8:
1980 case MVT::i16:
1981 case MVT::i32:
1982 case MVT::i64:
1983 case MVT::f32:
1984 case MVT::f64:
1985 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1986 }
1987 }
1988
1989 return SDValue();
1990 }
1991
LowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG)1992 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1993 EVT VT = Op.getValueType();
1994 SDValue N = Op.getOperand(0);
1995 SDValue Elt = Op.getOperand(1);
1996 DebugLoc dl = Op.getDebugLoc();
1997 SDValue retval;
1998
1999 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2000 // Constant argument:
2001 int EltNo = (int) C->getZExtValue();
2002
2003 // sanity checks:
2004 if (VT == MVT::i8 && EltNo >= 16)
2005 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2006 else if (VT == MVT::i16 && EltNo >= 8)
2007 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2008 else if (VT == MVT::i32 && EltNo >= 4)
2009 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2010 else if (VT == MVT::i64 && EltNo >= 2)
2011 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2012
2013 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2014 // i32 and i64: Element 0 is the preferred slot
2015 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
2016 }
2017
2018 // Need to generate shuffle mask and extract:
2019 int prefslot_begin = -1, prefslot_end = -1;
2020 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2021
2022 switch (VT.getSimpleVT().SimpleTy) {
2023 default:
2024 assert(false && "Invalid value type!");
2025 case MVT::i8: {
2026 prefslot_begin = prefslot_end = 3;
2027 break;
2028 }
2029 case MVT::i16: {
2030 prefslot_begin = 2; prefslot_end = 3;
2031 break;
2032 }
2033 case MVT::i32:
2034 case MVT::f32: {
2035 prefslot_begin = 0; prefslot_end = 3;
2036 break;
2037 }
2038 case MVT::i64:
2039 case MVT::f64: {
2040 prefslot_begin = 0; prefslot_end = 7;
2041 break;
2042 }
2043 }
2044
2045 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2046 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2047
2048 unsigned int ShufBytes[16] = {
2049 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2050 };
2051 for (int i = 0; i < 16; ++i) {
2052 // zero fill uppper part of preferred slot, don't care about the
2053 // other slots:
2054 unsigned int mask_val;
2055 if (i <= prefslot_end) {
2056 mask_val =
2057 ((i < prefslot_begin)
2058 ? 0x80
2059 : elt_byte + (i - prefslot_begin));
2060
2061 ShufBytes[i] = mask_val;
2062 } else
2063 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2064 }
2065
2066 SDValue ShufMask[4];
2067 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2068 unsigned bidx = i * 4;
2069 unsigned int bits = ((ShufBytes[bidx] << 24) |
2070 (ShufBytes[bidx+1] << 16) |
2071 (ShufBytes[bidx+2] << 8) |
2072 ShufBytes[bidx+3]);
2073 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2074 }
2075
2076 SDValue ShufMaskVec =
2077 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2078 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
2079
2080 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2081 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2082 N, N, ShufMaskVec));
2083 } else {
2084 // Variable index: Rotate the requested element into slot 0, then replicate
2085 // slot 0 across the vector
2086 EVT VecVT = N.getValueType();
2087 if (!VecVT.isSimple() || !VecVT.isVector()) {
2088 report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2089 "vector type!");
2090 }
2091
2092 // Make life easier by making sure the index is zero-extended to i32
2093 if (Elt.getValueType() != MVT::i32)
2094 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2095
2096 // Scale the index to a bit/byte shift quantity
2097 APInt scaleFactor =
2098 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2099 unsigned scaleShift = scaleFactor.logBase2();
2100 SDValue vecShift;
2101
2102 if (scaleShift > 0) {
2103 // Scale the shift factor:
2104 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2105 DAG.getConstant(scaleShift, MVT::i32));
2106 }
2107
2108 vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
2109
2110 // Replicate the bytes starting at byte 0 across the entire vector (for
2111 // consistency with the notion of a unified register set)
2112 SDValue replicate;
2113
2114 switch (VT.getSimpleVT().SimpleTy) {
2115 default:
2116 report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2117 "type");
2118 /*NOTREACHED*/
2119 case MVT::i8: {
2120 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2121 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2122 factor, factor, factor, factor);
2123 break;
2124 }
2125 case MVT::i16: {
2126 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2127 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2128 factor, factor, factor, factor);
2129 break;
2130 }
2131 case MVT::i32:
2132 case MVT::f32: {
2133 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2134 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2135 factor, factor, factor, factor);
2136 break;
2137 }
2138 case MVT::i64:
2139 case MVT::f64: {
2140 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2141 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2142 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2143 loFactor, hiFactor, loFactor, hiFactor);
2144 break;
2145 }
2146 }
2147
2148 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2149 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2150 vecShift, vecShift, replicate));
2151 }
2152
2153 return retval;
2154 }
2155
LowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG)2156 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2157 SDValue VecOp = Op.getOperand(0);
2158 SDValue ValOp = Op.getOperand(1);
2159 SDValue IdxOp = Op.getOperand(2);
2160 DebugLoc dl = Op.getDebugLoc();
2161 EVT VT = Op.getValueType();
2162 EVT eltVT = ValOp.getValueType();
2163
2164 // use 0 when the lane to insert to is 'undef'
2165 int64_t Offset=0;
2166 if (IdxOp.getOpcode() != ISD::UNDEF) {
2167 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2168 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2169 Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
2170 }
2171
2172 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2173 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2174 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2175 DAG.getRegister(SPU::R1, PtrVT),
2176 DAG.getConstant(Offset, PtrVT));
2177 // widen the mask when dealing with half vectors
2178 EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
2179 128/ VT.getVectorElementType().getSizeInBits());
2180 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
2181
2182 SDValue result =
2183 DAG.getNode(SPUISD::SHUFB, dl, VT,
2184 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2185 VecOp,
2186 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
2187
2188 return result;
2189 }
2190
LowerI8Math(SDValue Op,SelectionDAG & DAG,unsigned Opc,const TargetLowering & TLI)2191 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2192 const TargetLowering &TLI)
2193 {
2194 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2195 DebugLoc dl = Op.getDebugLoc();
2196 EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
2197
2198 assert(Op.getValueType() == MVT::i8);
2199 switch (Opc) {
2200 default:
2201 llvm_unreachable("Unhandled i8 math operator");
2202 /*NOTREACHED*/
2203 break;
2204 case ISD::ADD: {
2205 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2206 // the result:
2207 SDValue N1 = Op.getOperand(1);
2208 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2209 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2210 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2211 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2212
2213 }
2214
2215 case ISD::SUB: {
2216 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2217 // the result:
2218 SDValue N1 = Op.getOperand(1);
2219 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2220 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2221 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2222 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2223 }
2224 case ISD::ROTR:
2225 case ISD::ROTL: {
2226 SDValue N1 = Op.getOperand(1);
2227 EVT N1VT = N1.getValueType();
2228
2229 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2230 if (!N1VT.bitsEq(ShiftVT)) {
2231 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2232 ? ISD::ZERO_EXTEND
2233 : ISD::TRUNCATE;
2234 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2235 }
2236
2237 // Replicate lower 8-bits into upper 8:
2238 SDValue ExpandArg =
2239 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2240 DAG.getNode(ISD::SHL, dl, MVT::i16,
2241 N0, DAG.getConstant(8, MVT::i32)));
2242
2243 // Truncate back down to i8
2244 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2245 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2246 }
2247 case ISD::SRL:
2248 case ISD::SHL: {
2249 SDValue N1 = Op.getOperand(1);
2250 EVT N1VT = N1.getValueType();
2251
2252 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2253 if (!N1VT.bitsEq(ShiftVT)) {
2254 unsigned N1Opc = ISD::ZERO_EXTEND;
2255
2256 if (N1.getValueType().bitsGT(ShiftVT))
2257 N1Opc = ISD::TRUNCATE;
2258
2259 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2260 }
2261
2262 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2263 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2264 }
2265 case ISD::SRA: {
2266 SDValue N1 = Op.getOperand(1);
2267 EVT N1VT = N1.getValueType();
2268
2269 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2270 if (!N1VT.bitsEq(ShiftVT)) {
2271 unsigned N1Opc = ISD::SIGN_EXTEND;
2272
2273 if (N1VT.bitsGT(ShiftVT))
2274 N1Opc = ISD::TRUNCATE;
2275 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2276 }
2277
2278 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2279 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2280 }
2281 case ISD::MUL: {
2282 SDValue N1 = Op.getOperand(1);
2283
2284 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2285 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2286 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2287 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2288 break;
2289 }
2290 }
2291
2292 return SDValue();
2293 }
2294
2295 //! Lower byte immediate operations for v16i8 vectors:
2296 static SDValue
LowerByteImmed(SDValue Op,SelectionDAG & DAG)2297 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2298 SDValue ConstVec;
2299 SDValue Arg;
2300 EVT VT = Op.getValueType();
2301 DebugLoc dl = Op.getDebugLoc();
2302
2303 ConstVec = Op.getOperand(0);
2304 Arg = Op.getOperand(1);
2305 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2306 if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
2307 ConstVec = ConstVec.getOperand(0);
2308 } else {
2309 ConstVec = Op.getOperand(1);
2310 Arg = Op.getOperand(0);
2311 if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
2312 ConstVec = ConstVec.getOperand(0);
2313 }
2314 }
2315 }
2316
2317 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2318 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2319 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2320
2321 APInt APSplatBits, APSplatUndef;
2322 unsigned SplatBitSize;
2323 bool HasAnyUndefs;
2324 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2325
2326 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2327 HasAnyUndefs, minSplatBits)
2328 && minSplatBits <= SplatBitSize) {
2329 uint64_t SplatBits = APSplatBits.getZExtValue();
2330 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2331
2332 SmallVector<SDValue, 16> tcVec;
2333 tcVec.assign(16, tc);
2334 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2335 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2336 }
2337 }
2338
2339 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2340 // lowered. Return the operation, rather than a null SDValue.
2341 return Op;
2342 }
2343
2344 //! Custom lowering for CTPOP (count population)
2345 /*!
2346 Custom lowering code that counts the number ones in the input
2347 operand. SPU has such an instruction, but it counts the number of
2348 ones per byte, which then have to be accumulated.
2349 */
LowerCTPOP(SDValue Op,SelectionDAG & DAG)2350 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2351 EVT VT = Op.getValueType();
2352 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2353 VT, (128 / VT.getSizeInBits()));
2354 DebugLoc dl = Op.getDebugLoc();
2355
2356 switch (VT.getSimpleVT().SimpleTy) {
2357 default:
2358 assert(false && "Invalid value type!");
2359 case MVT::i8: {
2360 SDValue N = Op.getOperand(0);
2361 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2362
2363 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2364 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2365
2366 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2367 }
2368
2369 case MVT::i16: {
2370 MachineFunction &MF = DAG.getMachineFunction();
2371 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2372
2373 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2374
2375 SDValue N = Op.getOperand(0);
2376 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2377 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2378 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2379
2380 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2381 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2382
2383 // CNTB_result becomes the chain to which all of the virtual registers
2384 // CNTB_reg, SUM1_reg become associated:
2385 SDValue CNTB_result =
2386 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2387
2388 SDValue CNTB_rescopy =
2389 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2390
2391 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2392
2393 return DAG.getNode(ISD::AND, dl, MVT::i16,
2394 DAG.getNode(ISD::ADD, dl, MVT::i16,
2395 DAG.getNode(ISD::SRL, dl, MVT::i16,
2396 Tmp1, Shift1),
2397 Tmp1),
2398 Mask0);
2399 }
2400
2401 case MVT::i32: {
2402 MachineFunction &MF = DAG.getMachineFunction();
2403 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2404
2405 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2406 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2407
2408 SDValue N = Op.getOperand(0);
2409 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2410 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2411 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2412 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2413
2414 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2415 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2416
2417 // CNTB_result becomes the chain to which all of the virtual registers
2418 // CNTB_reg, SUM1_reg become associated:
2419 SDValue CNTB_result =
2420 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2421
2422 SDValue CNTB_rescopy =
2423 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2424
2425 SDValue Comp1 =
2426 DAG.getNode(ISD::SRL, dl, MVT::i32,
2427 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2428 Shift1);
2429
2430 SDValue Sum1 =
2431 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2432 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2433
2434 SDValue Sum1_rescopy =
2435 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2436
2437 SDValue Comp2 =
2438 DAG.getNode(ISD::SRL, dl, MVT::i32,
2439 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2440 Shift2);
2441 SDValue Sum2 =
2442 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2443 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2444
2445 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2446 }
2447
2448 case MVT::i64:
2449 break;
2450 }
2451
2452 return SDValue();
2453 }
2454
2455 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2456 /*!
2457 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2458 All conversions to i64 are expanded to a libcall.
2459 */
LowerFP_TO_INT(SDValue Op,SelectionDAG & DAG,const SPUTargetLowering & TLI)2460 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2461 const SPUTargetLowering &TLI) {
2462 EVT OpVT = Op.getValueType();
2463 SDValue Op0 = Op.getOperand(0);
2464 EVT Op0VT = Op0.getValueType();
2465
2466 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2467 || OpVT == MVT::i64) {
2468 // Convert f32 / f64 to i32 / i64 via libcall.
2469 RTLIB::Libcall LC =
2470 (Op.getOpcode() == ISD::FP_TO_SINT)
2471 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2472 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2473 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2474 SDValue Dummy;
2475 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2476 }
2477
2478 return Op;
2479 }
2480
2481 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2482 /*!
2483 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2484 All conversions from i64 are expanded to a libcall.
2485 */
LowerINT_TO_FP(SDValue Op,SelectionDAG & DAG,const SPUTargetLowering & TLI)2486 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2487 const SPUTargetLowering &TLI) {
2488 EVT OpVT = Op.getValueType();
2489 SDValue Op0 = Op.getOperand(0);
2490 EVT Op0VT = Op0.getValueType();
2491
2492 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2493 || Op0VT == MVT::i64) {
2494 // Convert i32, i64 to f64 via libcall:
2495 RTLIB::Libcall LC =
2496 (Op.getOpcode() == ISD::SINT_TO_FP)
2497 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2498 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2499 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2500 SDValue Dummy;
2501 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2502 }
2503
2504 return Op;
2505 }
2506
2507 //! Lower ISD::SETCC
2508 /*!
2509 This handles MVT::f64 (double floating point) condition lowering
2510 */
LowerSETCC(SDValue Op,SelectionDAG & DAG,const TargetLowering & TLI)2511 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2512 const TargetLowering &TLI) {
2513 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2514 DebugLoc dl = Op.getDebugLoc();
2515 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2516
2517 SDValue lhs = Op.getOperand(0);
2518 SDValue rhs = Op.getOperand(1);
2519 EVT lhsVT = lhs.getValueType();
2520 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2521
2522 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2523 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2524 EVT IntVT(MVT::i64);
2525
2526 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2527 // selected to a NOP:
2528 SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
2529 SDValue lhsHi32 =
2530 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2531 DAG.getNode(ISD::SRL, dl, IntVT,
2532 i64lhs, DAG.getConstant(32, MVT::i32)));
2533 SDValue lhsHi32abs =
2534 DAG.getNode(ISD::AND, dl, MVT::i32,
2535 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2536 SDValue lhsLo32 =
2537 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2538
2539 // SETO and SETUO only use the lhs operand:
2540 if (CC->get() == ISD::SETO) {
2541 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2542 // SETUO
2543 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2544 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2545 DAG.getSetCC(dl, ccResultVT,
2546 lhs, DAG.getConstantFP(0.0, lhsVT),
2547 ISD::SETUO),
2548 DAG.getConstant(ccResultAllOnes, ccResultVT));
2549 } else if (CC->get() == ISD::SETUO) {
2550 // Evaluates to true if Op0 is [SQ]NaN
2551 return DAG.getNode(ISD::AND, dl, ccResultVT,
2552 DAG.getSetCC(dl, ccResultVT,
2553 lhsHi32abs,
2554 DAG.getConstant(0x7ff00000, MVT::i32),
2555 ISD::SETGE),
2556 DAG.getSetCC(dl, ccResultVT,
2557 lhsLo32,
2558 DAG.getConstant(0, MVT::i32),
2559 ISD::SETGT));
2560 }
2561
2562 SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
2563 SDValue rhsHi32 =
2564 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2565 DAG.getNode(ISD::SRL, dl, IntVT,
2566 i64rhs, DAG.getConstant(32, MVT::i32)));
2567
2568 // If a value is negative, subtract from the sign magnitude constant:
2569 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2570
2571 // Convert the sign-magnitude representation into 2's complement:
2572 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2573 lhsHi32, DAG.getConstant(31, MVT::i32));
2574 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2575 SDValue lhsSelect =
2576 DAG.getNode(ISD::SELECT, dl, IntVT,
2577 lhsSelectMask, lhsSignMag2TC, i64lhs);
2578
2579 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2580 rhsHi32, DAG.getConstant(31, MVT::i32));
2581 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2582 SDValue rhsSelect =
2583 DAG.getNode(ISD::SELECT, dl, IntVT,
2584 rhsSelectMask, rhsSignMag2TC, i64rhs);
2585
2586 unsigned compareOp;
2587
2588 switch (CC->get()) {
2589 case ISD::SETOEQ:
2590 case ISD::SETUEQ:
2591 compareOp = ISD::SETEQ; break;
2592 case ISD::SETOGT:
2593 case ISD::SETUGT:
2594 compareOp = ISD::SETGT; break;
2595 case ISD::SETOGE:
2596 case ISD::SETUGE:
2597 compareOp = ISD::SETGE; break;
2598 case ISD::SETOLT:
2599 case ISD::SETULT:
2600 compareOp = ISD::SETLT; break;
2601 case ISD::SETOLE:
2602 case ISD::SETULE:
2603 compareOp = ISD::SETLE; break;
2604 case ISD::SETUNE:
2605 case ISD::SETONE:
2606 compareOp = ISD::SETNE; break;
2607 default:
2608 report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2609 }
2610
2611 SDValue result =
2612 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2613 (ISD::CondCode) compareOp);
2614
2615 if ((CC->get() & 0x8) == 0) {
2616 // Ordered comparison:
2617 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2618 lhs, DAG.getConstantFP(0.0, MVT::f64),
2619 ISD::SETO);
2620 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2621 rhs, DAG.getConstantFP(0.0, MVT::f64),
2622 ISD::SETO);
2623 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2624
2625 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2626 }
2627
2628 return result;
2629 }
2630
2631 //! Lower ISD::SELECT_CC
2632 /*!
2633 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2634 SELB instruction.
2635
2636 \note Need to revisit this in the future: if the code path through the true
2637 and false value computations is longer than the latency of a branch (6
2638 cycles), then it would be more advantageous to branch and insert a new basic
2639 block and branch on the condition. However, this code does not make that
2640 assumption, given the simplisitc uses so far.
2641 */
2642
LowerSELECT_CC(SDValue Op,SelectionDAG & DAG,const TargetLowering & TLI)2643 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2644 const TargetLowering &TLI) {
2645 EVT VT = Op.getValueType();
2646 SDValue lhs = Op.getOperand(0);
2647 SDValue rhs = Op.getOperand(1);
2648 SDValue trueval = Op.getOperand(2);
2649 SDValue falseval = Op.getOperand(3);
2650 SDValue condition = Op.getOperand(4);
2651 DebugLoc dl = Op.getDebugLoc();
2652
2653 // NOTE: SELB's arguments: $rA, $rB, $mask
2654 //
2655 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2656 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2657 // condition was true and 0s where the condition was false. Hence, the
2658 // arguments to SELB get reversed.
2659
2660 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2661 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2662 // with another "cannot select select_cc" assert:
2663
2664 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2665 TLI.getSetCCResultType(Op.getValueType()),
2666 lhs, rhs, condition);
2667 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2668 }
2669
2670 //! Custom lower ISD::TRUNCATE
LowerTRUNCATE(SDValue Op,SelectionDAG & DAG)2671 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2672 {
2673 // Type to truncate to
2674 EVT VT = Op.getValueType();
2675 MVT simpleVT = VT.getSimpleVT();
2676 EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2677 VT, (128 / VT.getSizeInBits()));
2678 DebugLoc dl = Op.getDebugLoc();
2679
2680 // Type to truncate from
2681 SDValue Op0 = Op.getOperand(0);
2682 EVT Op0VT = Op0.getValueType();
2683
2684 if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
2685 // Create shuffle mask, least significant doubleword of quadword
2686 unsigned maskHigh = 0x08090a0b;
2687 unsigned maskLow = 0x0c0d0e0f;
2688 // Use a shuffle to perform the truncation
2689 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2690 DAG.getConstant(maskHigh, MVT::i32),
2691 DAG.getConstant(maskLow, MVT::i32),
2692 DAG.getConstant(maskHigh, MVT::i32),
2693 DAG.getConstant(maskLow, MVT::i32));
2694
2695 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2696 Op0, Op0, shufMask);
2697
2698 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2699 }
2700
2701 return SDValue(); // Leave the truncate unmolested
2702 }
2703
2704 /*!
2705 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2706 * algorithm is to duplicate the sign bit using rotmai to generate at
2707 * least one byte full of sign bits. Then propagate the "sign-byte" into
2708 * the leftmost words and the i64/i32 into the rightmost words using shufb.
2709 *
2710 * @param Op The sext operand
2711 * @param DAG The current DAG
2712 * @return The SDValue with the entire instruction sequence
2713 */
LowerSIGN_EXTEND(SDValue Op,SelectionDAG & DAG)2714 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2715 {
2716 DebugLoc dl = Op.getDebugLoc();
2717
2718 // Type to extend to
2719 MVT OpVT = Op.getValueType().getSimpleVT();
2720
2721 // Type to extend from
2722 SDValue Op0 = Op.getOperand(0);
2723 MVT Op0VT = Op0.getValueType().getSimpleVT();
2724
2725 // extend i8 & i16 via i32
2726 if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
2727 Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
2728 Op0VT = MVT::i32;
2729 }
2730
2731 // The type to extend to needs to be a i128 and
2732 // the type to extend from needs to be i64 or i32.
2733 assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2734 "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2735 (void)OpVT;
2736
2737 // Create shuffle mask
2738 unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2739 unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
2740 unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2741 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2742 DAG.getConstant(mask1, MVT::i32),
2743 DAG.getConstant(mask1, MVT::i32),
2744 DAG.getConstant(mask2, MVT::i32),
2745 DAG.getConstant(mask3, MVT::i32));
2746
2747 // Word wise arithmetic right shift to generate at least one byte
2748 // that contains sign bits.
2749 MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2750 SDValue sraVal = DAG.getNode(ISD::SRA,
2751 dl,
2752 mvt,
2753 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2754 DAG.getConstant(31, MVT::i32));
2755
2756 // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
2757 SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2758 dl, Op0VT, Op0,
2759 DAG.getTargetConstant(
2760 SPU::GPRCRegClass.getID(),
2761 MVT::i32)), 0);
2762 // Shuffle bytes - Copy the sign bits into the upper 64 bits
2763 // and the input value into the lower 64 bits.
2764 SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2765 extended, sraVal, shufMask);
2766 return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
2767 }
2768
2769 //! Custom (target-specific) lowering entry point
2770 /*!
2771 This is where LLVM's DAG selection process calls to do target-specific
2772 lowering of nodes.
2773 */
2774 SDValue
LowerOperation(SDValue Op,SelectionDAG & DAG) const2775 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2776 {
2777 unsigned Opc = (unsigned) Op.getOpcode();
2778 EVT VT = Op.getValueType();
2779
2780 switch (Opc) {
2781 default: {
2782 #ifndef NDEBUG
2783 errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2784 errs() << "Op.getOpcode() = " << Opc << "\n";
2785 errs() << "*Op.getNode():\n";
2786 Op.getNode()->dump();
2787 #endif
2788 llvm_unreachable(0);
2789 }
2790 case ISD::LOAD:
2791 case ISD::EXTLOAD:
2792 case ISD::SEXTLOAD:
2793 case ISD::ZEXTLOAD:
2794 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2795 case ISD::STORE:
2796 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2797 case ISD::ConstantPool:
2798 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2799 case ISD::GlobalAddress:
2800 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2801 case ISD::JumpTable:
2802 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2803 case ISD::ConstantFP:
2804 return LowerConstantFP(Op, DAG);
2805
2806 // i8, i64 math ops:
2807 case ISD::ADD:
2808 case ISD::SUB:
2809 case ISD::ROTR:
2810 case ISD::ROTL:
2811 case ISD::SRL:
2812 case ISD::SHL:
2813 case ISD::SRA: {
2814 if (VT == MVT::i8)
2815 return LowerI8Math(Op, DAG, Opc, *this);
2816 break;
2817 }
2818
2819 case ISD::FP_TO_SINT:
2820 case ISD::FP_TO_UINT:
2821 return LowerFP_TO_INT(Op, DAG, *this);
2822
2823 case ISD::SINT_TO_FP:
2824 case ISD::UINT_TO_FP:
2825 return LowerINT_TO_FP(Op, DAG, *this);
2826
2827 // Vector-related lowering.
2828 case ISD::BUILD_VECTOR:
2829 return LowerBUILD_VECTOR(Op, DAG);
2830 case ISD::SCALAR_TO_VECTOR:
2831 return LowerSCALAR_TO_VECTOR(Op, DAG);
2832 case ISD::VECTOR_SHUFFLE:
2833 return LowerVECTOR_SHUFFLE(Op, DAG);
2834 case ISD::EXTRACT_VECTOR_ELT:
2835 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2836 case ISD::INSERT_VECTOR_ELT:
2837 return LowerINSERT_VECTOR_ELT(Op, DAG);
2838
2839 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2840 case ISD::AND:
2841 case ISD::OR:
2842 case ISD::XOR:
2843 return LowerByteImmed(Op, DAG);
2844
2845 // Vector and i8 multiply:
2846 case ISD::MUL:
2847 if (VT == MVT::i8)
2848 return LowerI8Math(Op, DAG, Opc, *this);
2849
2850 case ISD::CTPOP:
2851 return LowerCTPOP(Op, DAG);
2852
2853 case ISD::SELECT_CC:
2854 return LowerSELECT_CC(Op, DAG, *this);
2855
2856 case ISD::SETCC:
2857 return LowerSETCC(Op, DAG, *this);
2858
2859 case ISD::TRUNCATE:
2860 return LowerTRUNCATE(Op, DAG);
2861
2862 case ISD::SIGN_EXTEND:
2863 return LowerSIGN_EXTEND(Op, DAG);
2864 }
2865
2866 return SDValue();
2867 }
2868
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const2869 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2870 SmallVectorImpl<SDValue>&Results,
2871 SelectionDAG &DAG) const
2872 {
2873 #if 0
2874 unsigned Opc = (unsigned) N->getOpcode();
2875 EVT OpVT = N->getValueType(0);
2876
2877 switch (Opc) {
2878 default: {
2879 errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2880 errs() << "Op.getOpcode() = " << Opc << "\n";
2881 errs() << "*Op.getNode():\n";
2882 N->dump();
2883 abort();
2884 /*NOTREACHED*/
2885 }
2886 }
2887 #endif
2888
2889 /* Otherwise, return unchanged */
2890 }
2891
2892 //===----------------------------------------------------------------------===//
2893 // Target Optimization Hooks
2894 //===----------------------------------------------------------------------===//
2895
2896 SDValue
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const2897 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2898 {
2899 #if 0
2900 TargetMachine &TM = getTargetMachine();
2901 #endif
2902 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2903 SelectionDAG &DAG = DCI.DAG;
2904 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2905 EVT NodeVT = N->getValueType(0); // The node's value type
2906 EVT Op0VT = Op0.getValueType(); // The first operand's result
2907 SDValue Result; // Initially, empty result
2908 DebugLoc dl = N->getDebugLoc();
2909
2910 switch (N->getOpcode()) {
2911 default: break;
2912 case ISD::ADD: {
2913 SDValue Op1 = N->getOperand(1);
2914
2915 if (Op0.getOpcode() == SPUISD::IndirectAddr
2916 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2917 // Normalize the operands to reduce repeated code
2918 SDValue IndirectArg = Op0, AddArg = Op1;
2919
2920 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2921 IndirectArg = Op1;
2922 AddArg = Op0;
2923 }
2924
2925 if (isa<ConstantSDNode>(AddArg)) {
2926 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2927 SDValue IndOp1 = IndirectArg.getOperand(1);
2928
2929 if (CN0->isNullValue()) {
2930 // (add (SPUindirect <arg>, <arg>), 0) ->
2931 // (SPUindirect <arg>, <arg>)
2932
2933 #if !defined(NDEBUG)
2934 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2935 errs() << "\n"
2936 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2937 << "With: (SPUindirect <arg>, <arg>)\n";
2938 }
2939 #endif
2940
2941 return IndirectArg;
2942 } else if (isa<ConstantSDNode>(IndOp1)) {
2943 // (add (SPUindirect <arg>, <const>), <const>) ->
2944 // (SPUindirect <arg>, <const + const>)
2945 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2946 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2947 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2948
2949 #if !defined(NDEBUG)
2950 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2951 errs() << "\n"
2952 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2953 << "), " << CN0->getSExtValue() << ")\n"
2954 << "With: (SPUindirect <arg>, "
2955 << combinedConst << ")\n";
2956 }
2957 #endif
2958
2959 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2960 IndirectArg, combinedValue);
2961 }
2962 }
2963 }
2964 break;
2965 }
2966 case ISD::SIGN_EXTEND:
2967 case ISD::ZERO_EXTEND:
2968 case ISD::ANY_EXTEND: {
2969 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2970 // (any_extend (SPUextract_elt0 <arg>)) ->
2971 // (SPUextract_elt0 <arg>)
2972 // Types must match, however...
2973 #if !defined(NDEBUG)
2974 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2975 errs() << "\nReplace: ";
2976 N->dump(&DAG);
2977 errs() << "\nWith: ";
2978 Op0.getNode()->dump(&DAG);
2979 errs() << "\n";
2980 }
2981 #endif
2982
2983 return Op0;
2984 }
2985 break;
2986 }
2987 case SPUISD::IndirectAddr: {
2988 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2989 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2990 if (CN != 0 && CN->isNullValue()) {
2991 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2992 // (SPUaform <addr>, 0)
2993
2994 DEBUG(errs() << "Replace: ");
2995 DEBUG(N->dump(&DAG));
2996 DEBUG(errs() << "\nWith: ");
2997 DEBUG(Op0.getNode()->dump(&DAG));
2998 DEBUG(errs() << "\n");
2999
3000 return Op0;
3001 }
3002 } else if (Op0.getOpcode() == ISD::ADD) {
3003 SDValue Op1 = N->getOperand(1);
3004 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
3005 // (SPUindirect (add <arg>, <arg>), 0) ->
3006 // (SPUindirect <arg>, <arg>)
3007 if (CN1->isNullValue()) {
3008
3009 #if !defined(NDEBUG)
3010 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3011 errs() << "\n"
3012 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
3013 << "With: (SPUindirect <arg>, <arg>)\n";
3014 }
3015 #endif
3016
3017 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
3018 Op0.getOperand(0), Op0.getOperand(1));
3019 }
3020 }
3021 }
3022 break;
3023 }
3024 case SPUISD::SHL_BITS:
3025 case SPUISD::SHL_BYTES:
3026 case SPUISD::ROTBYTES_LEFT: {
3027 SDValue Op1 = N->getOperand(1);
3028
3029 // Kill degenerate vector shifts:
3030 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
3031 if (CN->isNullValue()) {
3032 Result = Op0;
3033 }
3034 }
3035 break;
3036 }
3037 case SPUISD::PREFSLOT2VEC: {
3038 switch (Op0.getOpcode()) {
3039 default:
3040 break;
3041 case ISD::ANY_EXTEND:
3042 case ISD::ZERO_EXTEND:
3043 case ISD::SIGN_EXTEND: {
3044 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3045 // <arg>
3046 // but only if the SPUprefslot2vec and <arg> types match.
3047 SDValue Op00 = Op0.getOperand(0);
3048 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3049 SDValue Op000 = Op00.getOperand(0);
3050 if (Op000.getValueType() == NodeVT) {
3051 Result = Op000;
3052 }
3053 }
3054 break;
3055 }
3056 case SPUISD::VEC2PREFSLOT: {
3057 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
3058 // <arg>
3059 Result = Op0.getOperand(0);
3060 break;
3061 }
3062 }
3063 break;
3064 }
3065 }
3066
3067 // Otherwise, return unchanged.
3068 #ifndef NDEBUG
3069 if (Result.getNode()) {
3070 DEBUG(errs() << "\nReplace.SPU: ");
3071 DEBUG(N->dump(&DAG));
3072 DEBUG(errs() << "\nWith: ");
3073 DEBUG(Result.getNode()->dump(&DAG));
3074 DEBUG(errs() << "\n");
3075 }
3076 #endif
3077
3078 return Result;
3079 }
3080
3081 //===----------------------------------------------------------------------===//
3082 // Inline Assembly Support
3083 //===----------------------------------------------------------------------===//
3084
3085 /// getConstraintType - Given a constraint letter, return the type of
3086 /// constraint it is for this target.
3087 SPUTargetLowering::ConstraintType
getConstraintType(const std::string & ConstraintLetter) const3088 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3089 if (ConstraintLetter.size() == 1) {
3090 switch (ConstraintLetter[0]) {
3091 default: break;
3092 case 'b':
3093 case 'r':
3094 case 'f':
3095 case 'v':
3096 case 'y':
3097 return C_RegisterClass;
3098 }
3099 }
3100 return TargetLowering::getConstraintType(ConstraintLetter);
3101 }
3102
3103 /// Examine constraint type and operand type and determine a weight value.
3104 /// This object must already have been set up with the operand type
3105 /// and the current alternative constraint selected.
3106 TargetLowering::ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo & info,const char * constraint) const3107 SPUTargetLowering::getSingleConstraintMatchWeight(
3108 AsmOperandInfo &info, const char *constraint) const {
3109 ConstraintWeight weight = CW_Invalid;
3110 Value *CallOperandVal = info.CallOperandVal;
3111 // If we don't have a value, we can't do a match,
3112 // but allow it at the lowest weight.
3113 if (CallOperandVal == NULL)
3114 return CW_Default;
3115 // Look at the constraint type.
3116 switch (*constraint) {
3117 default:
3118 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
3119 break;
3120 //FIXME: Seems like the supported constraint letters were just copied
3121 // from PPC, as the following doesn't correspond to the GCC docs.
3122 // I'm leaving it so until someone adds the corresponding lowering support.
3123 case 'b':
3124 case 'r':
3125 case 'f':
3126 case 'd':
3127 case 'v':
3128 case 'y':
3129 weight = CW_Register;
3130 break;
3131 }
3132 return weight;
3133 }
3134
3135 std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string & Constraint,EVT VT) const3136 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3137 EVT VT) const
3138 {
3139 if (Constraint.size() == 1) {
3140 // GCC RS6000 Constraint Letters
3141 switch (Constraint[0]) {
3142 case 'b': // R1-R31
3143 case 'r': // R0-R31
3144 if (VT == MVT::i64)
3145 return std::make_pair(0U, SPU::R64CRegisterClass);
3146 return std::make_pair(0U, SPU::R32CRegisterClass);
3147 case 'f':
3148 if (VT == MVT::f32)
3149 return std::make_pair(0U, SPU::R32FPRegisterClass);
3150 else if (VT == MVT::f64)
3151 return std::make_pair(0U, SPU::R64FPRegisterClass);
3152 break;
3153 case 'v':
3154 return std::make_pair(0U, SPU::GPRCRegisterClass);
3155 }
3156 }
3157
3158 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3159 }
3160
3161 //! Compute used/known bits for a SPU operand
3162 void
computeMaskedBitsForTargetNode(const SDValue Op,const APInt & Mask,APInt & KnownZero,APInt & KnownOne,const SelectionDAG & DAG,unsigned Depth) const3163 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3164 const APInt &Mask,
3165 APInt &KnownZero,
3166 APInt &KnownOne,
3167 const SelectionDAG &DAG,
3168 unsigned Depth ) const {
3169 #if 0
3170 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3171
3172 switch (Op.getOpcode()) {
3173 default:
3174 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3175 break;
3176 case CALL:
3177 case SHUFB:
3178 case SHUFFLE_MASK:
3179 case CNTB:
3180 case SPUISD::PREFSLOT2VEC:
3181 case SPUISD::LDRESULT:
3182 case SPUISD::VEC2PREFSLOT:
3183 case SPUISD::SHLQUAD_L_BITS:
3184 case SPUISD::SHLQUAD_L_BYTES:
3185 case SPUISD::VEC_ROTL:
3186 case SPUISD::VEC_ROTR:
3187 case SPUISD::ROTBYTES_LEFT:
3188 case SPUISD::SELECT_MASK:
3189 case SPUISD::SELB:
3190 }
3191 #endif
3192 }
3193
3194 unsigned
ComputeNumSignBitsForTargetNode(SDValue Op,unsigned Depth) const3195 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3196 unsigned Depth) const {
3197 switch (Op.getOpcode()) {
3198 default:
3199 return 1;
3200
3201 case ISD::SETCC: {
3202 EVT VT = Op.getValueType();
3203
3204 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3205 VT = MVT::i32;
3206 }
3207 return VT.getSizeInBits();
3208 }
3209 }
3210 }
3211
3212 // LowerAsmOperandForConstraint
3213 void
LowerAsmOperandForConstraint(SDValue Op,std::string & Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const3214 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3215 std::string &Constraint,
3216 std::vector<SDValue> &Ops,
3217 SelectionDAG &DAG) const {
3218 // Default, for the time being, to the base class handler
3219 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
3220 }
3221
3222 /// isLegalAddressImmediate - Return true if the integer value can be used
3223 /// as the offset of the target addressing mode.
isLegalAddressImmediate(int64_t V,Type * Ty) const3224 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3225 Type *Ty) const {
3226 // SPU's addresses are 256K:
3227 return (V > -(1 << 18) && V < (1 << 18) - 1);
3228 }
3229
isLegalAddressImmediate(llvm::GlobalValue * GV) const3230 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3231 return false;
3232 }
3233
3234 bool
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const3235 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3236 // The SPU target isn't yet aware of offsets.
3237 return false;
3238 }
3239
3240 // can we compare to Imm without writing it into a register?
isLegalICmpImmediate(int64_t Imm) const3241 bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3242 //ceqi, cgti, etc. all take s10 operand
3243 return isInt<10>(Imm);
3244 }
3245
3246 bool
isLegalAddressingMode(const AddrMode & AM,Type *) const3247 SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
3248 Type * ) const{
3249
3250 // A-form: 18bit absolute address.
3251 if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
3252 return true;
3253
3254 // D-form: reg + 14bit offset
3255 if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
3256 return true;
3257
3258 // X-form: reg+reg
3259 if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
3260 return true;
3261
3262 return false;
3263 }
3264