1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief AMDGPU specific subclass of TargetSubtarget.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
17
18 #include "AMDGPU.h"
19 #include "R600InstrInfo.h"
20 #include "R600ISelLowering.h"
21 #include "R600FrameLowering.h"
22 #include "SIInstrInfo.h"
23 #include "SIISelLowering.h"
24 #include "SIFrameLowering.h"
25 #include "Utils/AMDGPUBaseInfo.h"
26 #include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
27 #include "llvm/Target/TargetSubtargetInfo.h"
28
29 #define GET_SUBTARGETINFO_HEADER
30 #include "AMDGPUGenSubtargetInfo.inc"
31
32 namespace llvm {
33
34 class SIMachineFunctionInfo;
35 class StringRef;
36
37 class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
38 public:
39 enum Generation {
40 R600 = 0,
41 R700,
42 EVERGREEN,
43 NORTHERN_ISLANDS,
44 SOUTHERN_ISLANDS,
45 SEA_ISLANDS,
46 VOLCANIC_ISLANDS,
47 };
48
49 enum {
50 ISAVersion0_0_0,
51 ISAVersion7_0_0,
52 ISAVersion7_0_1,
53 ISAVersion8_0_0,
54 ISAVersion8_0_1,
55 ISAVersion8_0_3
56 };
57
58 protected:
59 // Basic subtarget description.
60 Triple TargetTriple;
61 Generation Gen;
62 unsigned IsaVersion;
63 unsigned WavefrontSize;
64 int LocalMemorySize;
65 int LDSBankCount;
66 unsigned MaxPrivateElementSize;
67
68 // Possibly statically set by tablegen, but may want to be overridden.
69 bool FastFMAF32;
70 bool HalfRate64Ops;
71
72 // Dynamially set bits that enable features.
73 bool FP32Denormals;
74 bool FP64Denormals;
75 bool FPExceptions;
76 bool FlatForGlobal;
77 bool UnalignedBufferAccess;
78 bool EnableXNACK;
79 bool DebuggerInsertNops;
80 bool DebuggerReserveRegs;
81 bool DebuggerEmitPrologue;
82
83 // Used as options.
84 bool EnableVGPRSpilling;
85 bool EnablePromoteAlloca;
86 bool EnableLoadStoreOpt;
87 bool EnableUnsafeDSOffsetFolding;
88 bool EnableSIScheduler;
89 bool DumpCode;
90
91 // Subtarget statically properties set by tablegen
92 bool FP64;
93 bool IsGCN;
94 bool GCN1Encoding;
95 bool GCN3Encoding;
96 bool CIInsts;
97 bool SGPRInitBug;
98 bool HasSMemRealTime;
99 bool Has16BitInsts;
100 bool FlatAddressSpace;
101 bool R600ALUInst;
102 bool CaymanISA;
103 bool CFALUBug;
104 bool HasVertexCache;
105 short TexVTXClauseSize;
106
107 // Dummy feature to use for assembler in tablegen.
108 bool FeatureDisable;
109
110 InstrItineraryData InstrItins;
111
112 public:
113 AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
114 const TargetMachine &TM);
115 virtual ~AMDGPUSubtarget();
116 AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
117 StringRef GPU, StringRef FS);
118
119 const AMDGPUInstrInfo *getInstrInfo() const override;
120 const AMDGPUFrameLowering *getFrameLowering() const override;
121 const AMDGPUTargetLowering *getTargetLowering() const override;
122 const AMDGPURegisterInfo *getRegisterInfo() const override;
123
getInstrItineraryData()124 const InstrItineraryData *getInstrItineraryData() const override {
125 return &InstrItins;
126 }
127
128 void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
129
isAmdHsaOS()130 bool isAmdHsaOS() const {
131 return TargetTriple.getOS() == Triple::AMDHSA;
132 }
133
getGeneration()134 Generation getGeneration() const {
135 return Gen;
136 }
137
getWavefrontSize()138 unsigned getWavefrontSize() const {
139 return WavefrontSize;
140 }
141
getLocalMemorySize()142 int getLocalMemorySize() const {
143 return LocalMemorySize;
144 }
145
getLDSBankCount()146 int getLDSBankCount() const {
147 return LDSBankCount;
148 }
149
getMaxPrivateElementSize()150 unsigned getMaxPrivateElementSize() const {
151 return MaxPrivateElementSize;
152 }
153
hasHWFP64()154 bool hasHWFP64() const {
155 return FP64;
156 }
157
hasFastFMAF32()158 bool hasFastFMAF32() const {
159 return FastFMAF32;
160 }
161
hasHalfRate64Ops()162 bool hasHalfRate64Ops() const {
163 return HalfRate64Ops;
164 }
165
hasAddr64()166 bool hasAddr64() const {
167 return (getGeneration() < VOLCANIC_ISLANDS);
168 }
169
hasBFE()170 bool hasBFE() const {
171 return (getGeneration() >= EVERGREEN);
172 }
173
hasBFI()174 bool hasBFI() const {
175 return (getGeneration() >= EVERGREEN);
176 }
177
hasBFM()178 bool hasBFM() const {
179 return hasBFE();
180 }
181
hasBCNT(unsigned Size)182 bool hasBCNT(unsigned Size) const {
183 if (Size == 32)
184 return (getGeneration() >= EVERGREEN);
185
186 if (Size == 64)
187 return (getGeneration() >= SOUTHERN_ISLANDS);
188
189 return false;
190 }
191
hasMulU24()192 bool hasMulU24() const {
193 return (getGeneration() >= EVERGREEN);
194 }
195
hasMulI24()196 bool hasMulI24() const {
197 return (getGeneration() >= SOUTHERN_ISLANDS ||
198 hasCaymanISA());
199 }
200
hasFFBL()201 bool hasFFBL() const {
202 return (getGeneration() >= EVERGREEN);
203 }
204
hasFFBH()205 bool hasFFBH() const {
206 return (getGeneration() >= EVERGREEN);
207 }
208
hasCARRY()209 bool hasCARRY() const {
210 return (getGeneration() >= EVERGREEN);
211 }
212
hasBORROW()213 bool hasBORROW() const {
214 return (getGeneration() >= EVERGREEN);
215 }
216
hasCaymanISA()217 bool hasCaymanISA() const {
218 return CaymanISA;
219 }
220
isPromoteAllocaEnabled()221 bool isPromoteAllocaEnabled() const {
222 return EnablePromoteAlloca;
223 }
224
unsafeDSOffsetFoldingEnabled()225 bool unsafeDSOffsetFoldingEnabled() const {
226 return EnableUnsafeDSOffsetFolding;
227 }
228
dumpCode()229 bool dumpCode() const {
230 return DumpCode;
231 }
232
233 /// Return the amount of LDS that can be used that will not restrict the
234 /// occupancy lower than WaveCount.
235 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const;
236
237 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
238 /// the given LDS memory size is the only constraint.
239 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
240
241
hasFP32Denormals()242 bool hasFP32Denormals() const {
243 return FP32Denormals;
244 }
245
hasFP64Denormals()246 bool hasFP64Denormals() const {
247 return FP64Denormals;
248 }
249
hasFPExceptions()250 bool hasFPExceptions() const {
251 return FPExceptions;
252 }
253
useFlatForGlobal()254 bool useFlatForGlobal() const {
255 return FlatForGlobal;
256 }
257
hasUnalignedBufferAccess()258 bool hasUnalignedBufferAccess() const {
259 return UnalignedBufferAccess;
260 }
261
isXNACKEnabled()262 bool isXNACKEnabled() const {
263 return EnableXNACK;
264 }
265
getMaxWavesPerCU()266 unsigned getMaxWavesPerCU() const {
267 if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
268 return 10;
269
270 // FIXME: Not sure what this is for other subtagets.
271 return 8;
272 }
273
274 /// \brief Returns the offset in bytes from the start of the input buffer
275 /// of the first explicit kernel argument.
getExplicitKernelArgOffset()276 unsigned getExplicitKernelArgOffset() const {
277 return isAmdHsaOS() ? 0 : 36;
278 }
279
getStackAlignment()280 unsigned getStackAlignment() const {
281 // Scratch is allocated in 256 dword per wave blocks.
282 return 4 * 256 / getWavefrontSize();
283 }
284
enableMachineScheduler()285 bool enableMachineScheduler() const override {
286 return true;
287 }
288
enableSubRegLiveness()289 bool enableSubRegLiveness() const override {
290 return true;
291 }
292 };
293
294 class R600Subtarget final : public AMDGPUSubtarget {
295 private:
296 R600InstrInfo InstrInfo;
297 R600FrameLowering FrameLowering;
298 R600TargetLowering TLInfo;
299
300 public:
301 R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
302 const TargetMachine &TM);
303
getInstrInfo()304 const R600InstrInfo *getInstrInfo() const override {
305 return &InstrInfo;
306 }
307
getFrameLowering()308 const R600FrameLowering *getFrameLowering() const override {
309 return &FrameLowering;
310 }
311
getTargetLowering()312 const R600TargetLowering *getTargetLowering() const override {
313 return &TLInfo;
314 }
315
getRegisterInfo()316 const R600RegisterInfo *getRegisterInfo() const override {
317 return &InstrInfo.getRegisterInfo();
318 }
319
hasCFAluBug()320 bool hasCFAluBug() const {
321 return CFALUBug;
322 }
323
hasVertexCache()324 bool hasVertexCache() const {
325 return HasVertexCache;
326 }
327
getTexVTXClauseSize()328 short getTexVTXClauseSize() const {
329 return TexVTXClauseSize;
330 }
331
332 unsigned getStackEntrySize() const;
333 };
334
335 class SISubtarget final : public AMDGPUSubtarget {
336 public:
337 enum {
338 FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
339 };
340
341 private:
342 SIInstrInfo InstrInfo;
343 SIFrameLowering FrameLowering;
344 SITargetLowering TLInfo;
345 std::unique_ptr<GISelAccessor> GISel;
346
347 public:
348 SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
349 const TargetMachine &TM);
350
getInstrInfo()351 const SIInstrInfo *getInstrInfo() const override {
352 return &InstrInfo;
353 }
354
getFrameLowering()355 const SIFrameLowering *getFrameLowering() const override {
356 return &FrameLowering;
357 }
358
getTargetLowering()359 const SITargetLowering *getTargetLowering() const override {
360 return &TLInfo;
361 }
362
getCallLowering()363 const CallLowering *getCallLowering() const override {
364 assert(GISel && "Access to GlobalISel APIs not set");
365 return GISel->getCallLowering();
366 }
367
getRegisterInfo()368 const SIRegisterInfo *getRegisterInfo() const override {
369 return &InstrInfo.getRegisterInfo();
370 }
371
setGISelAccessor(GISelAccessor & GISel)372 void setGISelAccessor(GISelAccessor &GISel) {
373 this->GISel.reset(&GISel);
374 }
375
376 void overrideSchedPolicy(MachineSchedPolicy &Policy,
377 unsigned NumRegionInstrs) const override;
378
379 bool isVGPRSpillingEnabled(const Function& F) const;
380
381 unsigned getAmdKernelCodeChipID() const;
382
383 AMDGPU::IsaVersion getIsaVersion() const;
384
getMaxNumUserSGPRs()385 unsigned getMaxNumUserSGPRs() const {
386 return 16;
387 }
388
hasFlatAddressSpace()389 bool hasFlatAddressSpace() const {
390 return FlatAddressSpace;
391 }
392
hasSMemRealTime()393 bool hasSMemRealTime() const {
394 return HasSMemRealTime;
395 }
396
has16BitInsts()397 bool has16BitInsts() const {
398 return Has16BitInsts;
399 }
400
enableSIScheduler()401 bool enableSIScheduler() const {
402 return EnableSIScheduler;
403 }
404
debuggerSupported()405 bool debuggerSupported() const {
406 return debuggerInsertNops() && debuggerReserveRegs() &&
407 debuggerEmitPrologue();
408 }
409
debuggerInsertNops()410 bool debuggerInsertNops() const {
411 return DebuggerInsertNops;
412 }
413
debuggerReserveRegs()414 bool debuggerReserveRegs() const {
415 return DebuggerReserveRegs;
416 }
417
debuggerEmitPrologue()418 bool debuggerEmitPrologue() const {
419 return DebuggerEmitPrologue;
420 }
421
loadStoreOptEnabled()422 bool loadStoreOptEnabled() const {
423 return EnableLoadStoreOpt;
424 }
425
hasSGPRInitBug()426 bool hasSGPRInitBug() const {
427 return SGPRInitBug;
428 }
429 };
430
431
getInstrInfo()432 inline const AMDGPUInstrInfo *AMDGPUSubtarget::getInstrInfo() const {
433 if (getGeneration() >= SOUTHERN_ISLANDS)
434 return static_cast<const SISubtarget *>(this)->getInstrInfo();
435
436 return static_cast<const R600Subtarget *>(this)->getInstrInfo();
437 }
438
getFrameLowering()439 inline const AMDGPUFrameLowering *AMDGPUSubtarget::getFrameLowering() const {
440 if (getGeneration() >= SOUTHERN_ISLANDS)
441 return static_cast<const SISubtarget *>(this)->getFrameLowering();
442
443 return static_cast<const R600Subtarget *>(this)->getFrameLowering();
444 }
445
getTargetLowering()446 inline const AMDGPUTargetLowering *AMDGPUSubtarget::getTargetLowering() const {
447 if (getGeneration() >= SOUTHERN_ISLANDS)
448 return static_cast<const SISubtarget *>(this)->getTargetLowering();
449
450 return static_cast<const R600Subtarget *>(this)->getTargetLowering();
451 }
452
getRegisterInfo()453 inline const AMDGPURegisterInfo *AMDGPUSubtarget::getRegisterInfo() const {
454 if (getGeneration() >= SOUTHERN_ISLANDS)
455 return static_cast<const SISubtarget *>(this)->getRegisterInfo();
456
457 return static_cast<const R600Subtarget *>(this)->getRegisterInfo();
458 }
459
460 } // End namespace llvm
461
462 #endif
463