• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implementation of the TargetInstrInfo class that is common to all
12 /// AMD GPUs.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 
23 using namespace llvm;
24 
25 #define GET_INSTRINFO_CTOR_DTOR
26 #define GET_INSTRINFO_NAMED_OPS
27 #define GET_INSTRMAP_INFO
28 #include "AMDGPUGenInstrInfo.inc"
29 
30 // Pin the vtable to this file.
anchor()31 void AMDGPUInstrInfo::anchor() {}
32 
AMDGPUInstrInfo(const AMDGPUSubtarget & ST)33 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
34   : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
35 
enableClusterLoads() const36 bool AMDGPUInstrInfo::enableClusterLoads() const {
37   return true;
38 }
39 
40 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
41 // the first 16 loads will be interleaved with the stores, and the next 16 will
42 // be clustered as expected. It should really split into 2 16 store batches.
43 //
44 // Loads are clustered until this returns false, rather than trying to schedule
45 // groups of stores. This also means we have to deal with saying different
46 // address space loads should be clustered, and ones which might cause bank
47 // conflicts.
48 //
49 // This might be deprecated so it might not be worth that much effort to fix.
shouldScheduleLoadsNear(SDNode * Load0,SDNode * Load1,int64_t Offset0,int64_t Offset1,unsigned NumLoads) const50 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
51                                               int64_t Offset0, int64_t Offset1,
52                                               unsigned NumLoads) const {
53   assert(Offset1 > Offset0 &&
54          "Second offset should be larger than first offset!");
55   // If we have less than 16 loads in a row, and the offsets are within 64
56   // bytes, then schedule together.
57 
58   // A cacheline is 64 bytes (for global memory).
59   return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
60 }
61 
getMaskedMIMGOp(uint16_t Opcode,unsigned Channels) const62 int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
63   switch (Channels) {
64   default: return Opcode;
65   case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
66   case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
67   case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
68   }
69 }
70 
71 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
72 enum SIEncodingFamily {
73   SI = 0,
74   VI = 1
75 };
76 
77 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
78 // header files, so we need to wrap it in a function that takes unsigned
79 // instead.
80 namespace llvm {
81 namespace AMDGPU {
getMCOpcode(uint16_t Opcode,unsigned Gen)82 static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
83   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
84 }
85 }
86 }
87 
subtargetEncodingFamily(const AMDGPUSubtarget & ST)88 static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
89   switch (ST.getGeneration()) {
90   case AMDGPUSubtarget::SOUTHERN_ISLANDS:
91   case AMDGPUSubtarget::SEA_ISLANDS:
92     return SIEncodingFamily::SI;
93   case AMDGPUSubtarget::VOLCANIC_ISLANDS:
94     return SIEncodingFamily::VI;
95 
96   // FIXME: This should never be called for r600 GPUs.
97   case AMDGPUSubtarget::R600:
98   case AMDGPUSubtarget::R700:
99   case AMDGPUSubtarget::EVERGREEN:
100   case AMDGPUSubtarget::NORTHERN_ISLANDS:
101     return SIEncodingFamily::SI;
102   }
103 
104   llvm_unreachable("Unknown subtarget generation!");
105 }
106 
pseudoToMCOpcode(int Opcode) const107 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
108   int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST));
109 
110   // -1 means that Opcode is already a native instruction.
111   if (MCOp == -1)
112     return Opcode;
113 
114   // (uint16_t)-1 means that Opcode is a pseudo instruction that has
115   // no encoding in the given subtarget generation.
116   if (MCOp == (uint16_t)-1)
117     return -1;
118 
119   return MCOp;
120 }
121