1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "../Target.h"
10
11 #include "../Latency.h"
12 #include "../Uops.h"
13 #include "MCTargetDesc/X86BaseInfo.h"
14 #include "MCTargetDesc/X86MCTargetDesc.h"
15 #include "X86.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/MC/MCInstBuilder.h"
19
20 namespace exegesis {
21
22 namespace {
23
24 // Common code for X86 Uops and Latency runners.
25 template <typename Impl> class X86BenchmarkRunner : public Impl {
26 using Impl::Impl;
27
28 llvm::Expected<SnippetPrototype>
generatePrototype(unsigned Opcode) const29 generatePrototype(unsigned Opcode) const override {
30 // Test whether we can generate a snippet for this instruction.
31 const auto &InstrInfo = this->State.getInstrInfo();
32 const auto OpcodeName = InstrInfo.getName(Opcode);
33 if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
34 OpcodeName.startswith("ADJCALLSTACK")) {
35 return llvm::make_error<BenchmarkFailure>(
36 "Unsupported opcode: Push/Pop/AdjCallStack");
37 }
38
39 // Handle X87.
40 const auto &InstrDesc = InstrInfo.get(Opcode);
41 const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask;
42 const Instruction Instr(InstrDesc, this->RATC);
43 switch (FPInstClass) {
44 case llvm::X86II::NotFP:
45 break;
46 case llvm::X86II::ZeroArgFP:
47 return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
48 case llvm::X86II::OneArgFP:
49 return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
50 case llvm::X86II::OneArgFPRW:
51 case llvm::X86II::TwoArgFP: {
52 // These are instructions like
53 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
54 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
55 // They are intrinsically serial and do not modify the state of the stack.
56 // We generate the same code for latency and uops.
57 return this->generateSelfAliasingPrototype(Instr);
58 }
59 case llvm::X86II::CompareFP:
60 return Impl::handleCompareFP(Instr);
61 case llvm::X86II::CondMovFP:
62 return Impl::handleCondMovFP(Instr);
63 case llvm::X86II::SpecialFP:
64 return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
65 default:
66 llvm_unreachable("Unknown FP Type!");
67 }
68
69 // Fallback to generic implementation.
70 return Impl::Base::generatePrototype(Opcode);
71 }
72 };
73
74 class X86LatencyImpl : public LatencyBenchmarkRunner {
75 protected:
76 using Base = LatencyBenchmarkRunner;
77 using Base::Base;
78 llvm::Expected<SnippetPrototype>
handleCompareFP(const Instruction & Instr) const79 handleCompareFP(const Instruction &Instr) const {
80 return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
81 }
82 llvm::Expected<SnippetPrototype>
handleCondMovFP(const Instruction & Instr) const83 handleCondMovFP(const Instruction &Instr) const {
84 return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
85 }
86 };
87
88 class X86UopsImpl : public UopsBenchmarkRunner {
89 protected:
90 using Base = UopsBenchmarkRunner;
91 using Base::Base;
92 // We can compute uops for any FP instruction that does not grow or shrink the
93 // stack (either do not touch the stack or push as much as they pop).
94 llvm::Expected<SnippetPrototype>
handleCompareFP(const Instruction & Instr) const95 handleCompareFP(const Instruction &Instr) const {
96 return generateUnconstrainedPrototype(
97 Instr, "instruction does not grow/shrink the FP stack");
98 }
99 llvm::Expected<SnippetPrototype>
handleCondMovFP(const Instruction & Instr) const100 handleCondMovFP(const Instruction &Instr) const {
101 return generateUnconstrainedPrototype(
102 Instr, "instruction does not grow/shrink the FP stack");
103 }
104 };
105
106 class ExegesisX86Target : public ExegesisTarget {
addTargetSpecificPasses(llvm::PassManagerBase & PM) const107 void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override {
108 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
109 PM.add(llvm::createX86FloatingPointStackifierPass());
110 }
111
setRegToConstant(const llvm::MCSubtargetInfo & STI,unsigned Reg) const112 std::vector<llvm::MCInst> setRegToConstant(const llvm::MCSubtargetInfo &STI,
113 unsigned Reg) const override {
114 // GPR.
115 if (llvm::X86::GR8RegClass.contains(Reg))
116 return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)};
117 if (llvm::X86::GR16RegClass.contains(Reg))
118 return {llvm::MCInstBuilder(llvm::X86::MOV16ri).addReg(Reg).addImm(1)};
119 if (llvm::X86::GR32RegClass.contains(Reg))
120 return {llvm::MCInstBuilder(llvm::X86::MOV32ri).addReg(Reg).addImm(1)};
121 if (llvm::X86::GR64RegClass.contains(Reg))
122 return {llvm::MCInstBuilder(llvm::X86::MOV64ri32).addReg(Reg).addImm(1)};
123 // MMX.
124 if (llvm::X86::VR64RegClass.contains(Reg))
125 return setVectorRegToConstant(Reg, 8, llvm::X86::MMX_MOVQ64rm);
126 // {X,Y,Z}MM.
127 if (llvm::X86::VR128XRegClass.contains(Reg)) {
128 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
129 return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQU32Z128rm);
130 if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
131 return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQUrm);
132 return setVectorRegToConstant(Reg, 16, llvm::X86::MOVDQUrm);
133 }
134 if (llvm::X86::VR256XRegClass.contains(Reg)) {
135 if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
136 return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQU32Z256rm);
137 return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQUYrm);
138 }
139 if (llvm::X86::VR512RegClass.contains(Reg))
140 return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU32Zrm);
141 // X87.
142 if (llvm::X86::RFP32RegClass.contains(Reg) ||
143 llvm::X86::RFP64RegClass.contains(Reg) ||
144 llvm::X86::RFP80RegClass.contains(Reg))
145 return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
146 if (Reg == llvm::X86::EFLAGS) {
147 // Set all flags to 0 but the bits that are "reserved and set to 1".
148 constexpr const uint32_t kImmValue = 0x00007002u;
149 std::vector<llvm::MCInst> Result;
150 Result.push_back(allocateStackSpace(8));
151 Result.push_back(fillStackSpace(llvm::X86::MOV64mi32, 0, kImmValue));
152 Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops.
153 return Result;
154 }
155 return {};
156 }
157
158 std::unique_ptr<BenchmarkRunner>
createLatencyBenchmarkRunner(const LLVMState & State) const159 createLatencyBenchmarkRunner(const LLVMState &State) const override {
160 return llvm::make_unique<X86BenchmarkRunner<X86LatencyImpl>>(State);
161 }
162
163 std::unique_ptr<BenchmarkRunner>
createUopsBenchmarkRunner(const LLVMState & State) const164 createUopsBenchmarkRunner(const LLVMState &State) const override {
165 return llvm::make_unique<X86BenchmarkRunner<X86UopsImpl>>(State);
166 }
167
matchesArch(llvm::Triple::ArchType Arch) const168 bool matchesArch(llvm::Triple::ArchType Arch) const override {
169 return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
170 }
171
172 private:
173 // setRegToConstant() specialized for a vector register of size
174 // `RegSizeBytes`. `RMOpcode` is the opcode used to do a memory -> vector
175 // register load.
176 static std::vector<llvm::MCInst>
setVectorRegToConstant(const unsigned Reg,const unsigned RegSizeBytes,const unsigned RMOpcode)177 setVectorRegToConstant(const unsigned Reg, const unsigned RegSizeBytes,
178 const unsigned RMOpcode) {
179 // There is no instruction to directly set XMM, go through memory.
180 // Since vector values can be interpreted as integers of various sizes (8
181 // to 64 bits) as well as floats and double, so we chose an immediate
182 // value that has set bits for all byte values and is a normal float/
183 // double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when
184 // interpreted as a float.
185 constexpr const uint32_t kImmValue = 0x40404040u;
186 std::vector<llvm::MCInst> Result;
187 Result.push_back(allocateStackSpace(RegSizeBytes));
188 constexpr const unsigned kMov32NumBytes = 4;
189 for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += kMov32NumBytes) {
190 Result.push_back(fillStackSpace(llvm::X86::MOV32mi, Disp, kImmValue));
191 }
192 Result.push_back(loadToReg(Reg, RMOpcode));
193 Result.push_back(releaseStackSpace(RegSizeBytes));
194 return Result;
195 }
196
197 // Allocates scratch memory on the stack.
allocateStackSpace(unsigned Bytes)198 static llvm::MCInst allocateStackSpace(unsigned Bytes) {
199 return llvm::MCInstBuilder(llvm::X86::SUB64ri8)
200 .addReg(llvm::X86::RSP)
201 .addReg(llvm::X86::RSP)
202 .addImm(Bytes);
203 }
204
205 // Fills scratch memory at offset `OffsetBytes` with value `Imm`.
fillStackSpace(unsigned MovOpcode,unsigned OffsetBytes,uint64_t Imm)206 static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
207 uint64_t Imm) {
208 return llvm::MCInstBuilder(MovOpcode)
209 // Address = ESP
210 .addReg(llvm::X86::RSP) // BaseReg
211 .addImm(1) // ScaleAmt
212 .addReg(0) // IndexReg
213 .addImm(OffsetBytes) // Disp
214 .addReg(0) // Segment
215 // Immediate.
216 .addImm(Imm);
217 }
218
219 // Loads scratch memory into register `Reg` using opcode `RMOpcode`.
loadToReg(unsigned Reg,unsigned RMOpcode)220 static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
221 return llvm::MCInstBuilder(RMOpcode)
222 .addReg(Reg)
223 // Address = ESP
224 .addReg(llvm::X86::RSP) // BaseReg
225 .addImm(1) // ScaleAmt
226 .addReg(0) // IndexReg
227 .addImm(0) // Disp
228 .addReg(0); // Segment
229 }
230
231 // Releases scratch memory.
releaseStackSpace(unsigned Bytes)232 static llvm::MCInst releaseStackSpace(unsigned Bytes) {
233 return llvm::MCInstBuilder(llvm::X86::ADD64ri8)
234 .addReg(llvm::X86::RSP)
235 .addReg(llvm::X86::RSP)
236 .addImm(Bytes);
237 }
238 };
239
240 } // namespace
241
getTheExegesisX86Target()242 static ExegesisTarget *getTheExegesisX86Target() {
243 static ExegesisX86Target Target;
244 return &Target;
245 }
246
InitializeX86ExegesisTarget()247 void InitializeX86ExegesisTarget() {
248 ExegesisTarget::registerTarget(getTheExegesisX86Target());
249 }
250
251 } // namespace exegesis
252