• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "../Target.h"
10 
11 #include "../Latency.h"
12 #include "../Uops.h"
13 #include "MCTargetDesc/X86BaseInfo.h"
14 #include "MCTargetDesc/X86MCTargetDesc.h"
15 #include "X86.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/MC/MCInstBuilder.h"
19 
20 namespace exegesis {
21 
22 namespace {
23 
24 // Common code for X86 Uops and Latency runners.
25 template <typename Impl> class X86BenchmarkRunner : public Impl {
26   using Impl::Impl;
27 
28   llvm::Expected<SnippetPrototype>
generatePrototype(unsigned Opcode) const29   generatePrototype(unsigned Opcode) const override {
30     // Test whether we can generate a snippet for this instruction.
31     const auto &InstrInfo = this->State.getInstrInfo();
32     const auto OpcodeName = InstrInfo.getName(Opcode);
33     if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
34         OpcodeName.startswith("ADJCALLSTACK")) {
35       return llvm::make_error<BenchmarkFailure>(
36           "Unsupported opcode: Push/Pop/AdjCallStack");
37     }
38 
39     // Handle X87.
40     const auto &InstrDesc = InstrInfo.get(Opcode);
41     const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask;
42     const Instruction Instr(InstrDesc, this->RATC);
43     switch (FPInstClass) {
44     case llvm::X86II::NotFP:
45       break;
46     case llvm::X86II::ZeroArgFP:
47       return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
48     case llvm::X86II::OneArgFP:
49       return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
50     case llvm::X86II::OneArgFPRW:
51     case llvm::X86II::TwoArgFP: {
52       // These are instructions like
53       //   - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
54       //   - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
55       // They are intrinsically serial and do not modify the state of the stack.
56       // We generate the same code for latency and uops.
57       return this->generateSelfAliasingPrototype(Instr);
58     }
59     case llvm::X86II::CompareFP:
60       return Impl::handleCompareFP(Instr);
61     case llvm::X86II::CondMovFP:
62       return Impl::handleCondMovFP(Instr);
63     case llvm::X86II::SpecialFP:
64       return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
65     default:
66       llvm_unreachable("Unknown FP Type!");
67     }
68 
69     // Fallback to generic implementation.
70     return Impl::Base::generatePrototype(Opcode);
71   }
72 };
73 
74 class X86LatencyImpl : public LatencyBenchmarkRunner {
75 protected:
76   using Base = LatencyBenchmarkRunner;
77   using Base::Base;
78   llvm::Expected<SnippetPrototype>
handleCompareFP(const Instruction & Instr) const79   handleCompareFP(const Instruction &Instr) const {
80     return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
81   }
82   llvm::Expected<SnippetPrototype>
handleCondMovFP(const Instruction & Instr) const83   handleCondMovFP(const Instruction &Instr) const {
84     return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
85   }
86 };
87 
88 class X86UopsImpl : public UopsBenchmarkRunner {
89 protected:
90   using Base = UopsBenchmarkRunner;
91   using Base::Base;
92   // We can compute uops for any FP instruction that does not grow or shrink the
93   // stack (either do not touch the stack or push as much as they pop).
94   llvm::Expected<SnippetPrototype>
handleCompareFP(const Instruction & Instr) const95   handleCompareFP(const Instruction &Instr) const {
96     return generateUnconstrainedPrototype(
97         Instr, "instruction does not grow/shrink the FP stack");
98   }
99   llvm::Expected<SnippetPrototype>
handleCondMovFP(const Instruction & Instr) const100   handleCondMovFP(const Instruction &Instr) const {
101     return generateUnconstrainedPrototype(
102         Instr, "instruction does not grow/shrink the FP stack");
103   }
104 };
105 
106 class ExegesisX86Target : public ExegesisTarget {
addTargetSpecificPasses(llvm::PassManagerBase & PM) const107   void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override {
108     // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
109     PM.add(llvm::createX86FloatingPointStackifierPass());
110   }
111 
setRegToConstant(const llvm::MCSubtargetInfo & STI,unsigned Reg) const112   std::vector<llvm::MCInst> setRegToConstant(const llvm::MCSubtargetInfo &STI,
113                                              unsigned Reg) const override {
114     // GPR.
115     if (llvm::X86::GR8RegClass.contains(Reg))
116       return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)};
117     if (llvm::X86::GR16RegClass.contains(Reg))
118       return {llvm::MCInstBuilder(llvm::X86::MOV16ri).addReg(Reg).addImm(1)};
119     if (llvm::X86::GR32RegClass.contains(Reg))
120       return {llvm::MCInstBuilder(llvm::X86::MOV32ri).addReg(Reg).addImm(1)};
121     if (llvm::X86::GR64RegClass.contains(Reg))
122       return {llvm::MCInstBuilder(llvm::X86::MOV64ri32).addReg(Reg).addImm(1)};
123     // MMX.
124     if (llvm::X86::VR64RegClass.contains(Reg))
125       return setVectorRegToConstant(Reg, 8, llvm::X86::MMX_MOVQ64rm);
126     // {X,Y,Z}MM.
127     if (llvm::X86::VR128XRegClass.contains(Reg)) {
128       if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
129         return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQU32Z128rm);
130       if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
131         return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQUrm);
132       return setVectorRegToConstant(Reg, 16, llvm::X86::MOVDQUrm);
133     }
134     if (llvm::X86::VR256XRegClass.contains(Reg)) {
135       if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
136         return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQU32Z256rm);
137       return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQUYrm);
138     }
139     if (llvm::X86::VR512RegClass.contains(Reg))
140       return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU32Zrm);
141     // X87.
142     if (llvm::X86::RFP32RegClass.contains(Reg) ||
143         llvm::X86::RFP64RegClass.contains(Reg) ||
144         llvm::X86::RFP80RegClass.contains(Reg))
145       return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
146     if (Reg == llvm::X86::EFLAGS) {
147       // Set all flags to 0 but the bits that are "reserved and set to 1".
148       constexpr const uint32_t kImmValue = 0x00007002u;
149       std::vector<llvm::MCInst> Result;
150       Result.push_back(allocateStackSpace(8));
151       Result.push_back(fillStackSpace(llvm::X86::MOV64mi32, 0, kImmValue));
152       Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops.
153       return Result;
154     }
155     return {};
156   }
157 
158   std::unique_ptr<BenchmarkRunner>
createLatencyBenchmarkRunner(const LLVMState & State) const159   createLatencyBenchmarkRunner(const LLVMState &State) const override {
160     return llvm::make_unique<X86BenchmarkRunner<X86LatencyImpl>>(State);
161   }
162 
163   std::unique_ptr<BenchmarkRunner>
createUopsBenchmarkRunner(const LLVMState & State) const164   createUopsBenchmarkRunner(const LLVMState &State) const override {
165     return llvm::make_unique<X86BenchmarkRunner<X86UopsImpl>>(State);
166   }
167 
matchesArch(llvm::Triple::ArchType Arch) const168   bool matchesArch(llvm::Triple::ArchType Arch) const override {
169     return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
170   }
171 
172 private:
173   // setRegToConstant() specialized for a vector register of size
174   // `RegSizeBytes`. `RMOpcode` is the opcode used to do a memory -> vector
175   // register load.
176   static std::vector<llvm::MCInst>
setVectorRegToConstant(const unsigned Reg,const unsigned RegSizeBytes,const unsigned RMOpcode)177   setVectorRegToConstant(const unsigned Reg, const unsigned RegSizeBytes,
178                          const unsigned RMOpcode) {
179     // There is no instruction to directly set XMM, go through memory.
180     // Since vector values can be interpreted as integers of various sizes (8
181     // to 64 bits) as well as floats and double, so we chose an immediate
182     // value that has set bits for all byte values and is a normal float/
183     // double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when
184     // interpreted as a float.
185     constexpr const uint32_t kImmValue = 0x40404040u;
186     std::vector<llvm::MCInst> Result;
187     Result.push_back(allocateStackSpace(RegSizeBytes));
188     constexpr const unsigned kMov32NumBytes = 4;
189     for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += kMov32NumBytes) {
190       Result.push_back(fillStackSpace(llvm::X86::MOV32mi, Disp, kImmValue));
191     }
192     Result.push_back(loadToReg(Reg, RMOpcode));
193     Result.push_back(releaseStackSpace(RegSizeBytes));
194     return Result;
195   }
196 
197   // Allocates scratch memory on the stack.
allocateStackSpace(unsigned Bytes)198   static llvm::MCInst allocateStackSpace(unsigned Bytes) {
199     return llvm::MCInstBuilder(llvm::X86::SUB64ri8)
200         .addReg(llvm::X86::RSP)
201         .addReg(llvm::X86::RSP)
202         .addImm(Bytes);
203   }
204 
205   // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
fillStackSpace(unsigned MovOpcode,unsigned OffsetBytes,uint64_t Imm)206   static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
207                                      uint64_t Imm) {
208     return llvm::MCInstBuilder(MovOpcode)
209         // Address = ESP
210         .addReg(llvm::X86::RSP) // BaseReg
211         .addImm(1)              // ScaleAmt
212         .addReg(0)              // IndexReg
213         .addImm(OffsetBytes)    // Disp
214         .addReg(0)              // Segment
215         // Immediate.
216         .addImm(Imm);
217   }
218 
219   // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
loadToReg(unsigned Reg,unsigned RMOpcode)220   static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
221     return llvm::MCInstBuilder(RMOpcode)
222         .addReg(Reg)
223         // Address = ESP
224         .addReg(llvm::X86::RSP) // BaseReg
225         .addImm(1)              // ScaleAmt
226         .addReg(0)              // IndexReg
227         .addImm(0)              // Disp
228         .addReg(0);             // Segment
229   }
230 
231   // Releases scratch memory.
releaseStackSpace(unsigned Bytes)232   static llvm::MCInst releaseStackSpace(unsigned Bytes) {
233     return llvm::MCInstBuilder(llvm::X86::ADD64ri8)
234         .addReg(llvm::X86::RSP)
235         .addReg(llvm::X86::RSP)
236         .addImm(Bytes);
237   }
238 };
239 
240 } // namespace
241 
getTheExegesisX86Target()242 static ExegesisTarget *getTheExegesisX86Target() {
243   static ExegesisX86Target Target;
244   return &Target;
245 }
246 
InitializeX86ExegesisTarget()247 void InitializeX86ExegesisTarget() {
248   ExegesisTarget::registerTarget(getTheExegesisX86Target());
249 }
250 
251 } // namespace exegesis
252