1 //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDGPU.h"
10 #include "CommonArgs.h"
11 #include "InputInfo.h"
12 #include "clang/Basic/TargetID.h"
13 #include "clang/Driver/Compilation.h"
14 #include "clang/Driver/DriverDiagnostic.h"
15 #include "llvm/Option/ArgList.h"
16 #include "llvm/Support/Path.h"
17 #include "llvm/Support/VirtualFileSystem.h"
18
19 using namespace clang::driver;
20 using namespace clang::driver::tools;
21 using namespace clang::driver::toolchains;
22 using namespace clang;
23 using namespace llvm::opt;
24
scanLibDevicePath(llvm::StringRef Path)25 void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
26 assert(!Path.empty());
27
28 const StringRef Suffix(".bc");
29 const StringRef Suffix2(".amdgcn.bc");
30
31 std::error_code EC;
32 for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE;
33 !EC && LI != LE; LI = LI.increment(EC)) {
34 StringRef FilePath = LI->path();
35 StringRef FileName = llvm::sys::path::filename(FilePath);
36 if (!FileName.endswith(Suffix))
37 continue;
38
39 StringRef BaseName;
40 if (FileName.endswith(Suffix2))
41 BaseName = FileName.drop_back(Suffix2.size());
42 else if (FileName.endswith(Suffix))
43 BaseName = FileName.drop_back(Suffix.size());
44
45 if (BaseName == "ocml") {
46 OCML = FilePath;
47 } else if (BaseName == "ockl") {
48 OCKL = FilePath;
49 } else if (BaseName == "opencl") {
50 OpenCL = FilePath;
51 } else if (BaseName == "hip") {
52 HIP = FilePath;
53 } else if (BaseName == "oclc_finite_only_off") {
54 FiniteOnly.Off = FilePath;
55 } else if (BaseName == "oclc_finite_only_on") {
56 FiniteOnly.On = FilePath;
57 } else if (BaseName == "oclc_daz_opt_on") {
58 DenormalsAreZero.On = FilePath;
59 } else if (BaseName == "oclc_daz_opt_off") {
60 DenormalsAreZero.Off = FilePath;
61 } else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
62 CorrectlyRoundedSqrt.On = FilePath;
63 } else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
64 CorrectlyRoundedSqrt.Off = FilePath;
65 } else if (BaseName == "oclc_unsafe_math_on") {
66 UnsafeMath.On = FilePath;
67 } else if (BaseName == "oclc_unsafe_math_off") {
68 UnsafeMath.Off = FilePath;
69 } else if (BaseName == "oclc_wavefrontsize64_on") {
70 WavefrontSize64.On = FilePath;
71 } else if (BaseName == "oclc_wavefrontsize64_off") {
72 WavefrontSize64.Off = FilePath;
73 } else {
74 // Process all bitcode filenames that look like
75 // ocl_isa_version_XXX.amdgcn.bc
76 const StringRef DeviceLibPrefix = "oclc_isa_version_";
77 if (!BaseName.startswith(DeviceLibPrefix))
78 continue;
79
80 StringRef IsaVersionNumber =
81 BaseName.drop_front(DeviceLibPrefix.size());
82
83 llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
84 SmallString<8> Tmp;
85 LibDeviceMap.insert(
86 std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
87 }
88 }
89 }
90
ParseHIPVersionFile(llvm::StringRef V)91 void RocmInstallationDetector::ParseHIPVersionFile(llvm::StringRef V) {
92 SmallVector<StringRef, 4> VersionParts;
93 V.split(VersionParts, '\n');
94 unsigned Major;
95 unsigned Minor;
96 for (auto Part : VersionParts) {
97 auto Splits = Part.split('=');
98 if (Splits.first == "HIP_VERSION_MAJOR")
99 Splits.second.getAsInteger(0, Major);
100 else if (Splits.first == "HIP_VERSION_MINOR")
101 Splits.second.getAsInteger(0, Minor);
102 else if (Splits.first == "HIP_VERSION_PATCH")
103 VersionPatch = Splits.second.str();
104 }
105 VersionMajorMinor = llvm::VersionTuple(Major, Minor);
106 DetectedVersion =
107 (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
108 }
109
110 // For candidate specified by --rocm-path we do not do strict check.
111 SmallVector<RocmInstallationDetector::Candidate, 4>
getInstallationPathCandidates()112 RocmInstallationDetector::getInstallationPathCandidates() {
113 SmallVector<Candidate, 4> Candidates;
114 if (!RocmPathArg.empty()) {
115 Candidates.emplace_back(RocmPathArg.str());
116 return Candidates;
117 }
118
119 // Try to find relative to the compiler binary.
120 const char *InstallDir = D.getInstalledDir();
121
122 // Check both a normal Unix prefix position of the clang binary, as well as
123 // the Windows-esque layout the ROCm packages use with the host architecture
124 // subdirectory of bin.
125
126 // Strip off directory (usually bin)
127 StringRef ParentDir = llvm::sys::path::parent_path(InstallDir);
128 StringRef ParentName = llvm::sys::path::filename(ParentDir);
129
130 // Some builds use bin/{host arch}, so go up again.
131 if (ParentName == "bin") {
132 ParentDir = llvm::sys::path::parent_path(ParentDir);
133 ParentName = llvm::sys::path::filename(ParentDir);
134 }
135
136 // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
137 if (ParentName == "llvm")
138 ParentDir = llvm::sys::path::parent_path(ParentDir);
139
140 Candidates.emplace_back(ParentDir.str(), /*StrictChecking=*/true);
141
142 // Device library may be installed in clang resource directory.
143 Candidates.emplace_back(D.ResourceDir, /*StrictChecking=*/true);
144
145 Candidates.emplace_back(D.SysRoot + "/opt/rocm", /*StrictChecking=*/true);
146 return Candidates;
147 }
148
RocmInstallationDetector(const Driver & D,const llvm::Triple & HostTriple,const llvm::opt::ArgList & Args,bool DetectHIPRuntime,bool DetectDeviceLib)149 RocmInstallationDetector::RocmInstallationDetector(
150 const Driver &D, const llvm::Triple &HostTriple,
151 const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib)
152 : D(D) {
153 RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ);
154 RocmDeviceLibPathArg =
155 Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
156 if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
157 HIPVersionArg = A->getValue();
158 unsigned Major = 0;
159 unsigned Minor = 0;
160 SmallVector<StringRef, 3> Parts;
161 HIPVersionArg.split(Parts, '.');
162 if (Parts.size())
163 Parts[0].getAsInteger(0, Major);
164 if (Parts.size() > 1)
165 Parts[1].getAsInteger(0, Minor);
166 if (Parts.size() > 2)
167 VersionPatch = Parts[2].str();
168 if (VersionPatch.empty())
169 VersionPatch = "0";
170 if (Major == 0 || Minor == 0)
171 D.Diag(diag::err_drv_invalid_value)
172 << A->getAsString(Args) << HIPVersionArg;
173
174 VersionMajorMinor = llvm::VersionTuple(Major, Minor);
175 DetectedVersion =
176 (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
177 } else {
178 VersionPatch = DefaultVersionPatch;
179 VersionMajorMinor =
180 llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor);
181 DetectedVersion = (Twine(DefaultVersionMajor) + "." +
182 Twine(DefaultVersionMinor) + "." + VersionPatch)
183 .str();
184 }
185
186 if (DetectHIPRuntime)
187 detectHIPRuntime();
188 if (DetectDeviceLib)
189 detectDeviceLibrary();
190 }
191
detectDeviceLibrary()192 void RocmInstallationDetector::detectDeviceLibrary() {
193 assert(LibDevicePath.empty());
194
195 if (!RocmDeviceLibPathArg.empty())
196 LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1];
197 else if (const char *LibPathEnv = ::getenv("HIP_DEVICE_LIB_PATH"))
198 LibDevicePath = LibPathEnv;
199
200 auto &FS = D.getVFS();
201 if (!LibDevicePath.empty()) {
202 // Maintain compatability with HIP flag/envvar pointing directly at the
203 // bitcode library directory. This points directly at the library path instead
204 // of the rocm root installation.
205 if (!FS.exists(LibDevicePath))
206 return;
207
208 scanLibDevicePath(LibDevicePath);
209 HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty();
210 return;
211 }
212
213 // The install path situation in old versions of ROCm is a real mess, and
214 // use a different install layout. Multiple copies of the device libraries
215 // exist for each frontend project, and differ depending on which build
216 // system produced the packages. Standalone OpenCL builds also have a
217 // different directory structure from the ROCm OpenCL package.
218 auto Candidates = getInstallationPathCandidates();
219 for (const auto &Candidate : Candidates) {
220 auto CandidatePath = Candidate.Path;
221
222 // Check device library exists at the given path.
223 auto CheckDeviceLib = [&](StringRef Path) {
224 bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
225 if (CheckLibDevice && !FS.exists(Path))
226 return false;
227
228 scanLibDevicePath(Path);
229
230 if (!NoBuiltinLibs) {
231 // Check that the required non-target libraries are all available.
232 if (!allGenericLibsValid())
233 return false;
234
235 // Check that we have found at least one libdevice that we can link in
236 // if -nobuiltinlib hasn't been specified.
237 if (LibDeviceMap.empty())
238 return false;
239 }
240 return true;
241 };
242
243 // The possible structures are:
244 // - ${ROCM_ROOT}/amdgcn/bitcode/*
245 // - ${ROCM_ROOT}/lib/*
246 // - ${ROCM_ROOT}/lib/bitcode/*
247 // so try to detect these layouts.
248 static constexpr std::array<const char *, 2> SubDirsList[] = {
249 {"amdgcn", "bitcode"},
250 {"lib", ""},
251 {"lib", "bitcode"},
252 };
253
254 // Make a path by appending sub-directories to InstallPath.
255 auto MakePath = [&](const llvm::ArrayRef<const char *> &SubDirs) {
256 auto Path = CandidatePath;
257 for (auto SubDir : SubDirs)
258 llvm::sys::path::append(Path, SubDir);
259 return Path;
260 };
261
262 for (auto SubDirs : SubDirsList) {
263 LibDevicePath = MakePath(SubDirs);
264 HasDeviceLibrary = CheckDeviceLib(LibDevicePath);
265 if (HasDeviceLibrary)
266 return;
267 }
268 }
269 }
270
detectHIPRuntime()271 void RocmInstallationDetector::detectHIPRuntime() {
272 auto Candidates = getInstallationPathCandidates();
273 auto &FS = D.getVFS();
274
275 for (const auto &Candidate : Candidates) {
276 InstallPath = Candidate.Path;
277 if (InstallPath.empty() || !FS.exists(InstallPath))
278 continue;
279
280 BinPath = InstallPath;
281 llvm::sys::path::append(BinPath, "bin");
282 IncludePath = InstallPath;
283 llvm::sys::path::append(IncludePath, "include");
284 LibPath = InstallPath;
285 llvm::sys::path::append(LibPath, "lib");
286
287 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
288 FS.getBufferForFile(BinPath + "/.hipVersion");
289 if (!VersionFile && Candidate.StrictChecking)
290 continue;
291
292 if (HIPVersionArg.empty() && VersionFile)
293 ParseHIPVersionFile((*VersionFile)->getBuffer());
294
295 HasHIPRuntime = true;
296 return;
297 }
298 HasHIPRuntime = false;
299 }
300
print(raw_ostream & OS) const301 void RocmInstallationDetector::print(raw_ostream &OS) const {
302 if (hasHIPRuntime())
303 OS << "Found HIP installation: " << InstallPath << ", version "
304 << DetectedVersion << '\n';
305 }
306
AddHIPIncludeArgs(const ArgList & DriverArgs,ArgStringList & CC1Args) const307 void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
308 ArgStringList &CC1Args) const {
309 bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5);
310
311 if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
312 // HIP header includes standard library wrapper headers under clang
313 // cuda_wrappers directory. Since these wrapper headers include_next
314 // standard C++ headers, whereas libc++ headers include_next other clang
315 // headers. The include paths have to follow this order:
316 // - wrapper include path
317 // - standard C++ include path
318 // - other clang include path
319 // Since standard C++ and other clang include paths are added in other
320 // places after this function, here we only need to make sure wrapper
321 // include path is added.
322 //
323 // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs
324 // a workaround.
325 SmallString<128> P(D.ResourceDir);
326 if (UsesRuntimeWrapper)
327 llvm::sys::path::append(P, "include", "cuda_wrappers");
328 CC1Args.push_back("-internal-isystem");
329 CC1Args.push_back(DriverArgs.MakeArgString(P));
330 }
331
332 if (DriverArgs.hasArg(options::OPT_nogpuinc))
333 return;
334
335 if (!hasHIPRuntime()) {
336 D.Diag(diag::err_drv_no_hip_runtime);
337 return;
338 }
339
340 CC1Args.push_back("-internal-isystem");
341 CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
342 if (UsesRuntimeWrapper)
343 CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
344 }
345
ConstructJob(Compilation & C,const JobAction & JA,const InputInfo & Output,const InputInfoList & Inputs,const ArgList & Args,const char * LinkingOutput) const346 void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
347 const InputInfo &Output,
348 const InputInfoList &Inputs,
349 const ArgList &Args,
350 const char *LinkingOutput) const {
351
352 std::string Linker = getToolChain().GetProgramPath(getShortName());
353 ArgStringList CmdArgs;
354 addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
355 AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
356 CmdArgs.push_back("-shared");
357 CmdArgs.push_back("-o");
358 CmdArgs.push_back(Output.getFilename());
359 C.addCommand(std::make_unique<Command>(
360 JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker),
361 CmdArgs, Inputs, Output));
362 }
363
getAMDGPUTargetFeatures(const Driver & D,const llvm::Triple & Triple,const llvm::opt::ArgList & Args,std::vector<StringRef> & Features)364 void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
365 const llvm::Triple &Triple,
366 const llvm::opt::ArgList &Args,
367 std::vector<StringRef> &Features) {
368 // Add target ID features to -target-feature options. No diagnostics should
369 // be emitted here since invalid target ID is diagnosed at other places.
370 StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
371 if (!TargetID.empty()) {
372 llvm::StringMap<bool> FeatureMap;
373 auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
374 if (OptionalGpuArch) {
375 StringRef GpuArch = OptionalGpuArch.getValue();
376 // Iterate through all possible target ID features for the given GPU.
377 // If it is mapped to true, add +feature.
378 // If it is mapped to false, add -feature.
379 // If it is not in the map (default), do not add it
380 for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) {
381 auto Pos = FeatureMap.find(Feature);
382 if (Pos == FeatureMap.end())
383 continue;
384 Features.push_back(Args.MakeArgStringRef(
385 (Twine(Pos->second ? "+" : "-") + Feature).str()));
386 }
387 }
388 }
389
390 if (Args.hasFlag(options::OPT_mwavefrontsize64,
391 options::OPT_mno_wavefrontsize64, false))
392 Features.push_back("+wavefrontsize64");
393
394 handleTargetFeaturesGroup(
395 Args, Features, options::OPT_m_amdgpu_Features_Group);
396 }
397
398 /// AMDGPU Toolchain
AMDGPUToolChain(const Driver & D,const llvm::Triple & Triple,const ArgList & Args)399 AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
400 const ArgList &Args)
401 : Generic_ELF(D, Triple, Args),
402 OptionsDefault(
403 {{options::OPT_O, "3"}, {options::OPT_cl_std_EQ, "CL1.2"}}) {
404 // Check code object version options. Emit warnings for legacy options
405 // and errors for the last invalid code object version options.
406 // It is done here to avoid repeated warning or error messages for
407 // each tool invocation.
408 (void)getOrCheckAMDGPUCodeObjectVersion(D, Args, /*Diagnose=*/true);
409 }
410
buildLinker() const411 Tool *AMDGPUToolChain::buildLinker() const {
412 return new tools::amdgpu::Linker(*this);
413 }
414
415 DerivedArgList *
TranslateArgs(const DerivedArgList & Args,StringRef BoundArch,Action::OffloadKind DeviceOffloadKind) const416 AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
417 Action::OffloadKind DeviceOffloadKind) const {
418
419 DerivedArgList *DAL =
420 Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
421
422 const OptTable &Opts = getDriver().getOpts();
423
424 if (!DAL)
425 DAL = new DerivedArgList(Args.getBaseArgs());
426
427 for (Arg *A : Args) {
428 if (!shouldSkipArgument(A))
429 DAL->append(A);
430 }
431
432 checkTargetID(*DAL);
433
434 if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
435 return DAL;
436
437 // Phase 1 (.cl -> .bc)
438 if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
439 DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit()
440 ? options::OPT_m64
441 : options::OPT_m32));
442
443 // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
444 // as they defined that way in Options.td
445 if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4,
446 options::OPT_Ofast))
447 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),
448 getOptionDefault(options::OPT_O));
449 }
450
451 return DAL;
452 }
453
getDefaultDenormsAreZeroForTarget(llvm::AMDGPU::GPUKind Kind)454 bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
455 llvm::AMDGPU::GPUKind Kind) {
456
457 // Assume nothing without a specific target.
458 if (Kind == llvm::AMDGPU::GK_NONE)
459 return false;
460
461 const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
462
463 // Default to enabling f32 denormals by default on subtargets where fma is
464 // fast with denormals
465 const bool BothDenormAndFMAFast =
466 (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
467 (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
468 return !BothDenormAndFMAFast;
469 }
470
getDefaultDenormalModeForType(const llvm::opt::ArgList & DriverArgs,const JobAction & JA,const llvm::fltSemantics * FPType) const471 llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
472 const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
473 const llvm::fltSemantics *FPType) const {
474 // Denormals should always be enabled for f16 and f64.
475 if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
476 return llvm::DenormalMode::getIEEE();
477
478 if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
479 JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
480 auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch());
481 auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch);
482 if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
483 DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
484 options::OPT_fno_cuda_flush_denormals_to_zero,
485 getDefaultDenormsAreZeroForTarget(Kind)))
486 return llvm::DenormalMode::getPreserveSign();
487
488 return llvm::DenormalMode::getIEEE();
489 }
490
491 const StringRef GpuArch = getGPUArch(DriverArgs);
492 auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
493
494 // TODO: There are way too many flags that change this. Do we need to check
495 // them all?
496 bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
497 getDefaultDenormsAreZeroForTarget(Kind);
498
499 // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
500 // also implicit treated as zero (DAZ).
501 return DAZ ? llvm::DenormalMode::getPreserveSign() :
502 llvm::DenormalMode::getIEEE();
503 }
504
isWave64(const llvm::opt::ArgList & DriverArgs,llvm::AMDGPU::GPUKind Kind)505 bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
506 llvm::AMDGPU::GPUKind Kind) {
507 const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
508 bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
509
510 return !HasWave32 || DriverArgs.hasFlag(
511 options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
512 }
513
514
515 /// ROCM Toolchain
ROCMToolChain(const Driver & D,const llvm::Triple & Triple,const ArgList & Args)516 ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
517 const ArgList &Args)
518 : AMDGPUToolChain(D, Triple, Args) {
519 RocmInstallation.detectDeviceLibrary();
520 }
521
addClangTargetOptions(const llvm::opt::ArgList & DriverArgs,llvm::opt::ArgStringList & CC1Args,Action::OffloadKind DeviceOffloadingKind) const522 void AMDGPUToolChain::addClangTargetOptions(
523 const llvm::opt::ArgList &DriverArgs,
524 llvm::opt::ArgStringList &CC1Args,
525 Action::OffloadKind DeviceOffloadingKind) const {
526 // Default to "hidden" visibility, as object level linking will not be
527 // supported for the foreseeable future.
528 if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
529 options::OPT_fvisibility_ms_compat)) {
530 CC1Args.push_back("-fvisibility");
531 CC1Args.push_back("hidden");
532 CC1Args.push_back("-fapply-global-visibility-to-externs");
533 }
534 }
535
536 StringRef
getGPUArch(const llvm::opt::ArgList & DriverArgs) const537 AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
538 return getProcessorFromTargetID(
539 getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ));
540 }
541
checkTargetID(const llvm::opt::ArgList & DriverArgs) const542 void AMDGPUToolChain::checkTargetID(
543 const llvm::opt::ArgList &DriverArgs) const {
544 StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
545 if (TargetID.empty())
546 return;
547
548 llvm::StringMap<bool> FeatureMap;
549 auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap);
550 if (!OptionalGpuArch) {
551 getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetID;
552 }
553 }
554
addClangTargetOptions(const llvm::opt::ArgList & DriverArgs,llvm::opt::ArgStringList & CC1Args,Action::OffloadKind DeviceOffloadingKind) const555 void ROCMToolChain::addClangTargetOptions(
556 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
557 Action::OffloadKind DeviceOffloadingKind) const {
558 AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
559 DeviceOffloadingKind);
560
561 // For the OpenCL case where there is no offload target, accept -nostdlib to
562 // disable bitcode linking.
563 if (DeviceOffloadingKind == Action::OFK_None &&
564 DriverArgs.hasArg(options::OPT_nostdlib))
565 return;
566
567 if (DriverArgs.hasArg(options::OPT_nogpulib))
568 return;
569
570 if (!RocmInstallation.hasDeviceLibrary()) {
571 getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
572 return;
573 }
574
575 // Get the device name and canonicalize it
576 const StringRef GpuArch = getGPUArch(DriverArgs);
577 auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
578 const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
579 std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
580 if (LibDeviceFile.empty()) {
581 getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
582 return;
583 }
584
585 bool Wave64 = isWave64(DriverArgs, Kind);
586
587 // TODO: There are way too many flags that change this. Do we need to check
588 // them all?
589 bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
590 getDefaultDenormsAreZeroForTarget(Kind);
591 bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
592
593 bool UnsafeMathOpt =
594 DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
595 bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
596 bool CorrectSqrt =
597 DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
598
599 // Add the OpenCL specific bitcode library.
600 CC1Args.push_back("-mlink-builtin-bitcode");
601 CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath()));
602
603 // Add the generic set of libraries.
604 RocmInstallation.addCommonBitcodeLibCC1Args(
605 DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly,
606 UnsafeMathOpt, FastRelaxedMath, CorrectSqrt);
607 }
608
addCommonBitcodeLibCC1Args(const llvm::opt::ArgList & DriverArgs,llvm::opt::ArgStringList & CC1Args,StringRef LibDeviceFile,bool Wave64,bool DAZ,bool FiniteOnly,bool UnsafeMathOpt,bool FastRelaxedMath,bool CorrectSqrt) const609 void RocmInstallationDetector::addCommonBitcodeLibCC1Args(
610 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
611 StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly,
612 bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const {
613 static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode";
614
615 CC1Args.push_back(LinkBitcodeFlag);
616 CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath()));
617
618 CC1Args.push_back(LinkBitcodeFlag);
619 CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath()));
620
621 CC1Args.push_back(LinkBitcodeFlag);
622 CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ)));
623
624 CC1Args.push_back(LinkBitcodeFlag);
625 CC1Args.push_back(DriverArgs.MakeArgString(
626 getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)));
627
628 CC1Args.push_back(LinkBitcodeFlag);
629 CC1Args.push_back(DriverArgs.MakeArgString(
630 getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)));
631
632 CC1Args.push_back(LinkBitcodeFlag);
633 CC1Args.push_back(
634 DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt)));
635
636 CC1Args.push_back(LinkBitcodeFlag);
637 CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64)));
638
639 CC1Args.push_back(LinkBitcodeFlag);
640 CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
641 }
642
shouldSkipArgument(const llvm::opt::Arg * A) const643 bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
644 Option O = A->getOption();
645 if (O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie))
646 return true;
647 return false;
648 }
649