1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// AMDGPU HSA Metadata Streamer.
12 ///
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "AMDGPUHSAMetadataStreamer.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
26
27 namespace llvm {
28
29 static cl::opt<bool> DumpHSAMetadata(
30 "amdgpu-dump-hsa-metadata",
31 cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt<bool> VerifyHSAMetadata(
33 "amdgpu-verify-hsa-metadata",
34 cl::desc("Verify AMDGPU HSA Metadata"));
35
36 namespace AMDGPU {
37 namespace HSAMD {
38
dump(StringRef HSAMetadataString) const39 void MetadataStreamer::dump(StringRef HSAMetadataString) const {
40 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
41 }
42
verify(StringRef HSAMetadataString) const43 void MetadataStreamer::verify(StringRef HSAMetadataString) const {
44 errs() << "AMDGPU HSA Metadata Parser Test: ";
45
46 HSAMD::Metadata FromHSAMetadataString;
47 if (fromString(HSAMetadataString, FromHSAMetadataString)) {
48 errs() << "FAIL\n";
49 return;
50 }
51
52 std::string ToHSAMetadataString;
53 if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
54 errs() << "FAIL\n";
55 return;
56 }
57
58 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
59 << '\n';
60 if (HSAMetadataString != ToHSAMetadataString) {
61 errs() << "Original input: " << HSAMetadataString << '\n'
62 << "Produced output: " << ToHSAMetadataString << '\n';
63 }
64 }
65
getAccessQualifier(StringRef AccQual) const66 AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
67 if (AccQual.empty())
68 return AccessQualifier::Unknown;
69
70 return StringSwitch<AccessQualifier>(AccQual)
71 .Case("read_only", AccessQualifier::ReadOnly)
72 .Case("write_only", AccessQualifier::WriteOnly)
73 .Case("read_write", AccessQualifier::ReadWrite)
74 .Default(AccessQualifier::Default);
75 }
76
getAddressSpaceQualifer(unsigned AddressSpace) const77 AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
78 unsigned AddressSpace) const {
79 if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS)
80 return AddressSpaceQualifier::Private;
81 if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS)
82 return AddressSpaceQualifier::Global;
83 if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS)
84 return AddressSpaceQualifier::Constant;
85 if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS)
86 return AddressSpaceQualifier::Local;
87 if (AddressSpace == AMDGPUASI.FLAT_ADDRESS)
88 return AddressSpaceQualifier::Generic;
89 if (AddressSpace == AMDGPUASI.REGION_ADDRESS)
90 return AddressSpaceQualifier::Region;
91
92 llvm_unreachable("Unknown address space qualifier");
93 }
94
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const95 ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
96 StringRef BaseTypeName) const {
97 if (TypeQual.find("pipe") != StringRef::npos)
98 return ValueKind::Pipe;
99
100 return StringSwitch<ValueKind>(BaseTypeName)
101 .Case("image1d_t", ValueKind::Image)
102 .Case("image1d_array_t", ValueKind::Image)
103 .Case("image1d_buffer_t", ValueKind::Image)
104 .Case("image2d_t", ValueKind::Image)
105 .Case("image2d_array_t", ValueKind::Image)
106 .Case("image2d_array_depth_t", ValueKind::Image)
107 .Case("image2d_array_msaa_t", ValueKind::Image)
108 .Case("image2d_array_msaa_depth_t", ValueKind::Image)
109 .Case("image2d_depth_t", ValueKind::Image)
110 .Case("image2d_msaa_t", ValueKind::Image)
111 .Case("image2d_msaa_depth_t", ValueKind::Image)
112 .Case("image3d_t", ValueKind::Image)
113 .Case("sampler_t", ValueKind::Sampler)
114 .Case("queue_t", ValueKind::Queue)
115 .Default(isa<PointerType>(Ty) ?
116 (Ty->getPointerAddressSpace() ==
117 AMDGPUASI.LOCAL_ADDRESS ?
118 ValueKind::DynamicSharedPointer :
119 ValueKind::GlobalBuffer) :
120 ValueKind::ByValue);
121 }
122
getValueType(Type * Ty,StringRef TypeName) const123 ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
124 switch (Ty->getTypeID()) {
125 case Type::IntegerTyID: {
126 auto Signed = !TypeName.startswith("u");
127 switch (Ty->getIntegerBitWidth()) {
128 case 8:
129 return Signed ? ValueType::I8 : ValueType::U8;
130 case 16:
131 return Signed ? ValueType::I16 : ValueType::U16;
132 case 32:
133 return Signed ? ValueType::I32 : ValueType::U32;
134 case 64:
135 return Signed ? ValueType::I64 : ValueType::U64;
136 default:
137 return ValueType::Struct;
138 }
139 }
140 case Type::HalfTyID:
141 return ValueType::F16;
142 case Type::FloatTyID:
143 return ValueType::F32;
144 case Type::DoubleTyID:
145 return ValueType::F64;
146 case Type::PointerTyID:
147 return getValueType(Ty->getPointerElementType(), TypeName);
148 case Type::VectorTyID:
149 return getValueType(Ty->getVectorElementType(), TypeName);
150 default:
151 return ValueType::Struct;
152 }
153 }
154
getTypeName(Type * Ty,bool Signed) const155 std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
156 switch (Ty->getTypeID()) {
157 case Type::IntegerTyID: {
158 if (!Signed)
159 return (Twine('u') + getTypeName(Ty, true)).str();
160
161 auto BitWidth = Ty->getIntegerBitWidth();
162 switch (BitWidth) {
163 case 8:
164 return "char";
165 case 16:
166 return "short";
167 case 32:
168 return "int";
169 case 64:
170 return "long";
171 default:
172 return (Twine('i') + Twine(BitWidth)).str();
173 }
174 }
175 case Type::HalfTyID:
176 return "half";
177 case Type::FloatTyID:
178 return "float";
179 case Type::DoubleTyID:
180 return "double";
181 case Type::VectorTyID: {
182 auto VecTy = cast<VectorType>(Ty);
183 auto ElTy = VecTy->getElementType();
184 auto NumElements = VecTy->getVectorNumElements();
185 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
186 }
187 default:
188 return "unknown";
189 }
190 }
191
getWorkGroupDimensions(MDNode * Node) const192 std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
193 MDNode *Node) const {
194 std::vector<uint32_t> Dims;
195 if (Node->getNumOperands() != 3)
196 return Dims;
197
198 for (auto &Op : Node->operands())
199 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
200 return Dims;
201 }
202
getHSACodeProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const203 Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
204 const MachineFunction &MF,
205 const SIProgramInfo &ProgramInfo) const {
206 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
207 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
208 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
209 const Function &F = MF.getFunction();
210
211 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
212 F.getCallingConv() == CallingConv::SPIR_KERNEL);
213
214 unsigned MaxKernArgAlign;
215 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
216 MaxKernArgAlign);
217 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
218 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
219 HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u);
220 HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
221 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
222 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
223 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
224 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
225 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
226 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
227 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
228
229 return HSACodeProps;
230 }
231
getHSADebugProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const232 Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
233 const MachineFunction &MF,
234 const SIProgramInfo &ProgramInfo) const {
235 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
236 HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
237
238 if (!STM.debuggerSupported())
239 return HSADebugProps;
240
241 HSADebugProps.mDebuggerABIVersion.push_back(1);
242 HSADebugProps.mDebuggerABIVersion.push_back(0);
243
244 if (STM.debuggerEmitPrologue()) {
245 HSADebugProps.mPrivateSegmentBufferSGPR =
246 ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
247 HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
248 ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
249 }
250
251 return HSADebugProps;
252 }
253
emitVersion()254 void MetadataStreamer::emitVersion() {
255 auto &Version = HSAMetadata.mVersion;
256
257 Version.push_back(VersionMajor);
258 Version.push_back(VersionMinor);
259 }
260
emitPrintf(const Module & Mod)261 void MetadataStreamer::emitPrintf(const Module &Mod) {
262 auto &Printf = HSAMetadata.mPrintf;
263
264 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
265 if (!Node)
266 return;
267
268 for (auto Op : Node->operands())
269 if (Op->getNumOperands())
270 Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
271 }
272
emitKernelLanguage(const Function & Func)273 void MetadataStreamer::emitKernelLanguage(const Function &Func) {
274 auto &Kernel = HSAMetadata.mKernels.back();
275
276 // TODO: What about other languages?
277 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
278 if (!Node || !Node->getNumOperands())
279 return;
280 auto Op0 = Node->getOperand(0);
281 if (Op0->getNumOperands() <= 1)
282 return;
283
284 Kernel.mLanguage = "OpenCL C";
285 Kernel.mLanguageVersion.push_back(
286 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
287 Kernel.mLanguageVersion.push_back(
288 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
289 }
290
emitKernelAttrs(const Function & Func)291 void MetadataStreamer::emitKernelAttrs(const Function &Func) {
292 auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
293
294 if (auto Node = Func.getMetadata("reqd_work_group_size"))
295 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
296 if (auto Node = Func.getMetadata("work_group_size_hint"))
297 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
298 if (auto Node = Func.getMetadata("vec_type_hint")) {
299 Attrs.mVecTypeHint = getTypeName(
300 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
301 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
302 }
303 if (Func.hasFnAttribute("runtime-handle")) {
304 Attrs.mRuntimeHandle =
305 Func.getFnAttribute("runtime-handle").getValueAsString().str();
306 }
307 }
308
emitKernelArgs(const Function & Func)309 void MetadataStreamer::emitKernelArgs(const Function &Func) {
310 for (auto &Arg : Func.args())
311 emitKernelArg(Arg);
312
313 emitHiddenKernelArgs(Func);
314 }
315
emitKernelArg(const Argument & Arg)316 void MetadataStreamer::emitKernelArg(const Argument &Arg) {
317 auto Func = Arg.getParent();
318 auto ArgNo = Arg.getArgNo();
319 const MDNode *Node;
320
321 StringRef Name;
322 Node = Func->getMetadata("kernel_arg_name");
323 if (Node && ArgNo < Node->getNumOperands())
324 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
325 else if (Arg.hasName())
326 Name = Arg.getName();
327
328 StringRef TypeName;
329 Node = Func->getMetadata("kernel_arg_type");
330 if (Node && ArgNo < Node->getNumOperands())
331 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
332
333 StringRef BaseTypeName;
334 Node = Func->getMetadata("kernel_arg_base_type");
335 if (Node && ArgNo < Node->getNumOperands())
336 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
337
338 StringRef AccQual;
339 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
340 Arg.hasNoAliasAttr()) {
341 AccQual = "read_only";
342 } else {
343 Node = Func->getMetadata("kernel_arg_access_qual");
344 if (Node && ArgNo < Node->getNumOperands())
345 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
346 }
347
348 StringRef TypeQual;
349 Node = Func->getMetadata("kernel_arg_type_qual");
350 if (Node && ArgNo < Node->getNumOperands())
351 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
352
353 Type *Ty = Arg.getType();
354 const DataLayout &DL = Func->getParent()->getDataLayout();
355
356 unsigned PointeeAlign = 0;
357 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
358 if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
359 PointeeAlign = Arg.getParamAlignment();
360 if (PointeeAlign == 0)
361 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
362 }
363 }
364
365 emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
366 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
367 }
368
emitKernelArg(const DataLayout & DL,Type * Ty,ValueKind ValueKind,unsigned PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)369 void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
370 ValueKind ValueKind,
371 unsigned PointeeAlign,
372 StringRef Name,
373 StringRef TypeName, StringRef BaseTypeName,
374 StringRef AccQual, StringRef TypeQual) {
375 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
376 auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
377
378 Arg.mName = Name;
379 Arg.mTypeName = TypeName;
380 Arg.mSize = DL.getTypeAllocSize(Ty);
381 Arg.mAlign = DL.getABITypeAlignment(Ty);
382 Arg.mValueKind = ValueKind;
383 Arg.mValueType = getValueType(Ty, BaseTypeName);
384 Arg.mPointeeAlign = PointeeAlign;
385
386 if (auto PtrTy = dyn_cast<PointerType>(Ty))
387 Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
388
389 Arg.mAccQual = getAccessQualifier(AccQual);
390
391 // TODO: Emit Arg.mActualAccQual.
392
393 SmallVector<StringRef, 1> SplitTypeQuals;
394 TypeQual.split(SplitTypeQuals, " ", -1, false);
395 for (StringRef Key : SplitTypeQuals) {
396 auto P = StringSwitch<bool*>(Key)
397 .Case("const", &Arg.mIsConst)
398 .Case("restrict", &Arg.mIsRestrict)
399 .Case("volatile", &Arg.mIsVolatile)
400 .Case("pipe", &Arg.mIsPipe)
401 .Default(nullptr);
402 if (P)
403 *P = true;
404 }
405 }
406
emitHiddenKernelArgs(const Function & Func)407 void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
408 int HiddenArgNumBytes =
409 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
410
411 if (!HiddenArgNumBytes)
412 return;
413
414 auto &DL = Func.getParent()->getDataLayout();
415 auto Int64Ty = Type::getInt64Ty(Func.getContext());
416
417 if (HiddenArgNumBytes >= 8)
418 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
419 if (HiddenArgNumBytes >= 16)
420 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
421 if (HiddenArgNumBytes >= 24)
422 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
423
424 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
425 AMDGPUASI.GLOBAL_ADDRESS);
426
427 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
428 // "none" argument.
429 if (HiddenArgNumBytes >= 32) {
430 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
431 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
432 else
433 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
434 }
435
436 // Emit "default queue" and "completion action" arguments if enqueue kernel is
437 // used, otherwise emit dummy "none" arguments.
438 if (HiddenArgNumBytes >= 48) {
439 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
440 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
441 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
442 } else {
443 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
444 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
445 }
446 }
447 }
448
begin(const Module & Mod)449 void MetadataStreamer::begin(const Module &Mod) {
450 AMDGPUASI = getAMDGPUAS(Mod);
451 emitVersion();
452 emitPrintf(Mod);
453 }
454
end()455 void MetadataStreamer::end() {
456 std::string HSAMetadataString;
457 if (toString(HSAMetadata, HSAMetadataString))
458 return;
459
460 if (DumpHSAMetadata)
461 dump(HSAMetadataString);
462 if (VerifyHSAMetadata)
463 verify(HSAMetadataString);
464 }
465
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)466 void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) {
467 auto &Func = MF.getFunction();
468 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
469 return;
470
471 auto CodeProps = getHSACodeProps(MF, ProgramInfo);
472 auto DebugProps = getHSADebugProps(MF, ProgramInfo);
473
474 HSAMetadata.mKernels.push_back(Kernel::Metadata());
475 auto &Kernel = HSAMetadata.mKernels.back();
476
477 Kernel.mName = Func.getName();
478 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
479 emitKernelLanguage(Func);
480 emitKernelAttrs(Func);
481 emitKernelArgs(Func);
482 HSAMetadata.mKernels.back().mCodeProps = CodeProps;
483 HSAMetadata.mKernels.back().mDebugProps = DebugProps;
484 }
485
486 } // end namespace HSAMD
487 } // end namespace AMDGPU
488 } // end namespace llvm
489