1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 ///
11 /// This file implements the InstrBuilder interface.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "InstrBuilder.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/WithColor.h"
21 #include "llvm/Support/raw_ostream.h"
22
23 #define DEBUG_TYPE "llvm-mca"
24
25 namespace mca {
26
27 using namespace llvm;
28
initializeUsedResources(InstrDesc & ID,const MCSchedClassDesc & SCDesc,const MCSubtargetInfo & STI,ArrayRef<uint64_t> ProcResourceMasks)29 static void initializeUsedResources(InstrDesc &ID,
30 const MCSchedClassDesc &SCDesc,
31 const MCSubtargetInfo &STI,
32 ArrayRef<uint64_t> ProcResourceMasks) {
33 const MCSchedModel &SM = STI.getSchedModel();
34
35 // Populate resources consumed.
36 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
37 std::vector<ResourcePlusCycles> Worklist;
38
39 // Track cycles contributed by resources that are in a "Super" relationship.
40 // This is required if we want to correctly match the behavior of method
41 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
42 // of "consumed" processor resources and resource cycles, the logic in
43 // ExpandProcResource() doesn't update the number of resource cycles
44 // contributed by a "Super" resource to a group.
45 // We need to take this into account when we find that a processor resource is
46 // part of a group, and it is also used as the "Super" of other resources.
47 // This map stores the number of cycles contributed by sub-resources that are
48 // part of a "Super" resource. The key value is the "Super" resource mask ID.
49 DenseMap<uint64_t, unsigned> SuperResources;
50
51 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
52 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
53 const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
54 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
55 if (PR.BufferSize != -1)
56 ID.Buffers.push_back(Mask);
57 CycleSegment RCy(0, PRE->Cycles, false);
58 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
59 if (PR.SuperIdx) {
60 uint64_t Super = ProcResourceMasks[PR.SuperIdx];
61 SuperResources[Super] += PRE->Cycles;
62 }
63 }
64
65 // Sort elements by mask popcount, so that we prioritize resource units over
66 // resource groups, and smaller groups over larger groups.
67 llvm::sort(Worklist.begin(), Worklist.end(),
68 [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
69 unsigned popcntA = countPopulation(A.first);
70 unsigned popcntB = countPopulation(B.first);
71 if (popcntA < popcntB)
72 return true;
73 if (popcntA > popcntB)
74 return false;
75 return A.first < B.first;
76 });
77
78 uint64_t UsedResourceUnits = 0;
79
80 // Remove cycles contributed by smaller resources.
81 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
82 ResourcePlusCycles &A = Worklist[I];
83 if (!A.second.size()) {
84 A.second.NumUnits = 0;
85 A.second.setReserved();
86 ID.Resources.emplace_back(A);
87 continue;
88 }
89
90 ID.Resources.emplace_back(A);
91 uint64_t NormalizedMask = A.first;
92 if (countPopulation(A.first) == 1) {
93 UsedResourceUnits |= A.first;
94 } else {
95 // Remove the leading 1 from the resource group mask.
96 NormalizedMask ^= PowerOf2Floor(NormalizedMask);
97 }
98
99 for (unsigned J = I + 1; J < E; ++J) {
100 ResourcePlusCycles &B = Worklist[J];
101 if ((NormalizedMask & B.first) == NormalizedMask) {
102 B.second.CS.Subtract(A.second.size() - SuperResources[A.first]);
103 if (countPopulation(B.first) > 1)
104 B.second.NumUnits++;
105 }
106 }
107 }
108
109 // A SchedWrite may specify a number of cycles in which a resource group
110 // is reserved. For example (on target x86; cpu Haswell):
111 //
112 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
113 // let ResourceCycles = [2, 2, 3];
114 // }
115 //
116 // This means:
117 // Resource units HWPort0 and HWPort1 are both used for 2cy.
118 // Resource group HWPort01 is the union of HWPort0 and HWPort1.
119 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
120 // will not be usable for 2 entire cycles from instruction issue.
121 //
122 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
123 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
124 // extra delay on top of the 2 cycles latency.
125 // During those extra cycles, HWPort01 is not usable by other instructions.
126 for (ResourcePlusCycles &RPC : ID.Resources) {
127 if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
128 // Remove the leading 1 from the resource group mask.
129 uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
130 if ((Mask & UsedResourceUnits) == Mask)
131 RPC.second.setReserved();
132 }
133 }
134
135 LLVM_DEBUG({
136 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
137 dbgs() << "\t\tMask=" << R.first << ", cy=" << R.second.size() << '\n';
138 for (const uint64_t R : ID.Buffers)
139 dbgs() << "\t\tBuffer Mask=" << R << '\n';
140 });
141 }
142
computeMaxLatency(InstrDesc & ID,const MCInstrDesc & MCDesc,const MCSchedClassDesc & SCDesc,const MCSubtargetInfo & STI)143 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
144 const MCSchedClassDesc &SCDesc,
145 const MCSubtargetInfo &STI) {
146 if (MCDesc.isCall()) {
147 // We cannot estimate how long this call will take.
148 // Artificially set an arbitrarily high latency (100cy).
149 ID.MaxLatency = 100U;
150 return;
151 }
152
153 int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
154 // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
155 ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
156 }
157
populateWrites(InstrDesc & ID,const MCInst & MCI,unsigned SchedClassID)158 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
159 unsigned SchedClassID) {
160 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
161 const MCSchedModel &SM = STI.getSchedModel();
162 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
163
164 // These are for now the (strong) assumptions made by this algorithm:
165 // * The number of explicit and implicit register definitions in a MCInst
166 // matches the number of explicit and implicit definitions according to
167 // the opcode descriptor (MCInstrDesc).
168 // * Register definitions take precedence over register uses in the operands
169 // list.
170 // * If an opcode specifies an optional definition, then the optional
171 // definition is always the last operand in the sequence, and it can be
172 // set to zero (i.e. "no register").
173 //
174 // These assumptions work quite well for most out-of-order in-tree targets
175 // like x86. This is mainly because the vast majority of instructions is
176 // expanded to MCInst using a straightforward lowering logic that preserves
177 // the ordering of the operands.
178 unsigned NumExplicitDefs = MCDesc.getNumDefs();
179 unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
180 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
181 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
182 if (MCDesc.hasOptionalDef())
183 TotalDefs++;
184 ID.Writes.resize(TotalDefs);
185 // Iterate over the operands list, and skip non-register operands.
186 // The first NumExplictDefs register operands are expected to be register
187 // definitions.
188 unsigned CurrentDef = 0;
189 unsigned i = 0;
190 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
191 const MCOperand &Op = MCI.getOperand(i);
192 if (!Op.isReg())
193 continue;
194
195 WriteDescriptor &Write = ID.Writes[CurrentDef];
196 Write.OpIndex = i;
197 if (CurrentDef < NumWriteLatencyEntries) {
198 const MCWriteLatencyEntry &WLE =
199 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
200 // Conservatively default to MaxLatency.
201 Write.Latency =
202 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
203 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
204 } else {
205 // Assign a default latency for this write.
206 Write.Latency = ID.MaxLatency;
207 Write.SClassOrWriteResourceID = 0;
208 }
209 Write.IsOptionalDef = false;
210 LLVM_DEBUG({
211 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
212 << ", Latency=" << Write.Latency
213 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
214 });
215 CurrentDef++;
216 }
217
218 if (CurrentDef != NumExplicitDefs)
219 llvm::report_fatal_error(
220 "error: Expected more register operand definitions. ");
221
222 CurrentDef = 0;
223 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
224 unsigned Index = NumExplicitDefs + CurrentDef;
225 WriteDescriptor &Write = ID.Writes[Index];
226 Write.OpIndex = ~CurrentDef;
227 Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
228 if (Index < NumWriteLatencyEntries) {
229 const MCWriteLatencyEntry &WLE =
230 *STI.getWriteLatencyEntry(&SCDesc, Index);
231 // Conservatively default to MaxLatency.
232 Write.Latency =
233 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
234 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
235 } else {
236 // Assign a default latency for this write.
237 Write.Latency = ID.MaxLatency;
238 Write.SClassOrWriteResourceID = 0;
239 }
240
241 Write.IsOptionalDef = false;
242 assert(Write.RegisterID != 0 && "Expected a valid phys register!");
243 LLVM_DEBUG({
244 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
245 << ", PhysReg=" << MRI.getName(Write.RegisterID)
246 << ", Latency=" << Write.Latency
247 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
248 });
249 }
250
251 if (MCDesc.hasOptionalDef()) {
252 // Always assume that the optional definition is the last operand of the
253 // MCInst sequence.
254 const MCOperand &Op = MCI.getOperand(MCI.getNumOperands() - 1);
255 if (i == MCI.getNumOperands() || !Op.isReg())
256 llvm::report_fatal_error(
257 "error: expected a register operand for an optional "
258 "definition. Instruction has not be correctly analyzed.\n",
259 false);
260
261 WriteDescriptor &Write = ID.Writes[TotalDefs - 1];
262 Write.OpIndex = MCI.getNumOperands() - 1;
263 // Assign a default latency for this write.
264 Write.Latency = ID.MaxLatency;
265 Write.SClassOrWriteResourceID = 0;
266 Write.IsOptionalDef = true;
267 }
268 }
269
populateReads(InstrDesc & ID,const MCInst & MCI,unsigned SchedClassID)270 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
271 unsigned SchedClassID) {
272 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
273 unsigned NumExplicitDefs = MCDesc.getNumDefs();
274
275 // Skip explicit definitions.
276 unsigned i = 0;
277 for (; i < MCI.getNumOperands() && NumExplicitDefs; ++i) {
278 const MCOperand &Op = MCI.getOperand(i);
279 if (Op.isReg())
280 NumExplicitDefs--;
281 }
282
283 if (NumExplicitDefs)
284 llvm::report_fatal_error(
285 "error: Expected more register operand definitions. ", false);
286
287 unsigned NumExplicitUses = MCI.getNumOperands() - i;
288 unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
289 if (MCDesc.hasOptionalDef()) {
290 assert(NumExplicitUses);
291 NumExplicitUses--;
292 }
293 unsigned TotalUses = NumExplicitUses + NumImplicitUses;
294 if (!TotalUses)
295 return;
296
297 ID.Reads.resize(TotalUses);
298 for (unsigned CurrentUse = 0; CurrentUse < NumExplicitUses; ++CurrentUse) {
299 ReadDescriptor &Read = ID.Reads[CurrentUse];
300 Read.OpIndex = i + CurrentUse;
301 Read.UseIndex = CurrentUse;
302 Read.SchedClassID = SchedClassID;
303 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
304 << ", UseIndex=" << Read.UseIndex << '\n');
305 }
306
307 for (unsigned CurrentUse = 0; CurrentUse < NumImplicitUses; ++CurrentUse) {
308 ReadDescriptor &Read = ID.Reads[NumExplicitUses + CurrentUse];
309 Read.OpIndex = ~CurrentUse;
310 Read.UseIndex = NumExplicitUses + CurrentUse;
311 Read.RegisterID = MCDesc.getImplicitUses()[CurrentUse];
312 Read.SchedClassID = SchedClassID;
313 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex << ", RegisterID="
314 << MRI.getName(Read.RegisterID) << '\n');
315 }
316 }
317
createInstrDescImpl(const MCInst & MCI)318 const InstrDesc &InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
319 assert(STI.getSchedModel().hasInstrSchedModel() &&
320 "Itineraries are not yet supported!");
321
322 // Obtain the instruction descriptor from the opcode.
323 unsigned short Opcode = MCI.getOpcode();
324 const MCInstrDesc &MCDesc = MCII.get(Opcode);
325 const MCSchedModel &SM = STI.getSchedModel();
326
327 // Then obtain the scheduling class information from the instruction.
328 unsigned SchedClassID = MCDesc.getSchedClass();
329 unsigned CPUID = SM.getProcessorID();
330
331 // Try to solve variant scheduling classes.
332 if (SchedClassID) {
333 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
334 SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
335
336 if (!SchedClassID)
337 llvm::report_fatal_error("unable to resolve this variant class.");
338 }
339
340 // Check if this instruction is supported. Otherwise, report a fatal error.
341 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
342 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
343 std::string ToString;
344 llvm::raw_string_ostream OS(ToString);
345 WithColor::error() << "found an unsupported instruction in the input"
346 << " assembly sequence.\n";
347 MCIP.printInst(&MCI, OS, "", STI);
348 OS.flush();
349
350 WithColor::note() << "instruction: " << ToString << '\n';
351 llvm::report_fatal_error(
352 "Don't know how to analyze unsupported instructions.");
353 }
354
355 // Create a new empty descriptor.
356 std::unique_ptr<InstrDesc> ID = llvm::make_unique<InstrDesc>();
357 ID->NumMicroOps = SCDesc.NumMicroOps;
358
359 if (MCDesc.isCall()) {
360 // We don't correctly model calls.
361 WithColor::warning() << "found a call in the input assembly sequence.\n";
362 WithColor::note() << "call instructions are not correctly modeled. "
363 << "Assume a latency of 100cy.\n";
364 }
365
366 if (MCDesc.isReturn()) {
367 WithColor::warning() << "found a return instruction in the input"
368 << " assembly sequence.\n";
369 WithColor::note() << "program counter updates are ignored.\n";
370 }
371
372 ID->MayLoad = MCDesc.mayLoad();
373 ID->MayStore = MCDesc.mayStore();
374 ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
375
376 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
377 computeMaxLatency(*ID, MCDesc, SCDesc, STI);
378 populateWrites(*ID, MCI, SchedClassID);
379 populateReads(*ID, MCI, SchedClassID);
380
381 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
382 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
383
384 // Now add the new descriptor.
385 SchedClassID = MCDesc.getSchedClass();
386 if (!SM.getSchedClassDesc(SchedClassID)->isVariant()) {
387 Descriptors[MCI.getOpcode()] = std::move(ID);
388 return *Descriptors[MCI.getOpcode()];
389 }
390
391 VariantDescriptors[&MCI] = std::move(ID);
392 return *VariantDescriptors[&MCI];
393 }
394
getOrCreateInstrDesc(const MCInst & MCI)395 const InstrDesc &InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
396 if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
397 return *Descriptors[MCI.getOpcode()];
398
399 if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
400 return *VariantDescriptors[&MCI];
401
402 return createInstrDescImpl(MCI);
403 }
404
405 std::unique_ptr<Instruction>
createInstruction(const MCInst & MCI)406 InstrBuilder::createInstruction(const MCInst &MCI) {
407 const InstrDesc &D = getOrCreateInstrDesc(MCI);
408 std::unique_ptr<Instruction> NewIS = llvm::make_unique<Instruction>(D);
409
410 // Initialize Reads first.
411 for (const ReadDescriptor &RD : D.Reads) {
412 int RegID = -1;
413 if (!RD.isImplicitRead()) {
414 // explicit read.
415 const MCOperand &Op = MCI.getOperand(RD.OpIndex);
416 // Skip non-register operands.
417 if (!Op.isReg())
418 continue;
419 RegID = Op.getReg();
420 } else {
421 // Implicit read.
422 RegID = RD.RegisterID;
423 }
424
425 // Skip invalid register operands.
426 if (!RegID)
427 continue;
428
429 // Okay, this is a register operand. Create a ReadState for it.
430 assert(RegID > 0 && "Invalid register ID found!");
431 NewIS->getUses().emplace_back(llvm::make_unique<ReadState>(RD, RegID));
432 }
433
434 // Early exit if there are no writes.
435 if (D.Writes.empty())
436 return NewIS;
437
438 // Track register writes that implicitly clear the upper portion of the
439 // underlying super-registers using an APInt.
440 APInt WriteMask(D.Writes.size(), 0);
441
442 // Now query the MCInstrAnalysis object to obtain information about which
443 // register writes implicitly clear the upper portion of a super-register.
444 MCIA.clearsSuperRegisters(MRI, MCI, WriteMask);
445
446 // Check if this is a dependency breaking instruction.
447 if (MCIA.isDependencyBreaking(STI, MCI))
448 NewIS->setDependencyBreaking();
449
450 // Initialize writes.
451 unsigned WriteIndex = 0;
452 for (const WriteDescriptor &WD : D.Writes) {
453 unsigned RegID = WD.isImplicitWrite() ? WD.RegisterID
454 : MCI.getOperand(WD.OpIndex).getReg();
455 // Check if this is a optional definition that references NoReg.
456 if (WD.IsOptionalDef && !RegID) {
457 ++WriteIndex;
458 continue;
459 }
460
461 assert(RegID && "Expected a valid register ID!");
462 NewIS->getDefs().emplace_back(llvm::make_unique<WriteState>(
463 WD, RegID, /* ClearsSuperRegs */ WriteMask[WriteIndex]));
464 ++WriteIndex;
465 }
466
467 return NewIS;
468 }
469 } // namespace mca
470