• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "CommonArgs.h"
11 #include "InputInfo.h"
12 #include "clang/Basic/TargetID.h"
13 #include "clang/Driver/Compilation.h"
14 #include "clang/Driver/DriverDiagnostic.h"
15 #include "llvm/Option/ArgList.h"
16 #include "llvm/Support/Path.h"
17 #include "llvm/Support/VirtualFileSystem.h"
18 
19 using namespace clang::driver;
20 using namespace clang::driver::tools;
21 using namespace clang::driver::toolchains;
22 using namespace clang;
23 using namespace llvm::opt;
24 
scanLibDevicePath(llvm::StringRef Path)25 void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
26   assert(!Path.empty());
27 
28   const StringRef Suffix(".bc");
29   const StringRef Suffix2(".amdgcn.bc");
30 
31   std::error_code EC;
32   for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE;
33        !EC && LI != LE; LI = LI.increment(EC)) {
34     StringRef FilePath = LI->path();
35     StringRef FileName = llvm::sys::path::filename(FilePath);
36     if (!FileName.endswith(Suffix))
37       continue;
38 
39     StringRef BaseName;
40     if (FileName.endswith(Suffix2))
41       BaseName = FileName.drop_back(Suffix2.size());
42     else if (FileName.endswith(Suffix))
43       BaseName = FileName.drop_back(Suffix.size());
44 
45     if (BaseName == "ocml") {
46       OCML = FilePath;
47     } else if (BaseName == "ockl") {
48       OCKL = FilePath;
49     } else if (BaseName == "opencl") {
50       OpenCL = FilePath;
51     } else if (BaseName == "hip") {
52       HIP = FilePath;
53     } else if (BaseName == "oclc_finite_only_off") {
54       FiniteOnly.Off = FilePath;
55     } else if (BaseName == "oclc_finite_only_on") {
56       FiniteOnly.On = FilePath;
57     } else if (BaseName == "oclc_daz_opt_on") {
58       DenormalsAreZero.On = FilePath;
59     } else if (BaseName == "oclc_daz_opt_off") {
60       DenormalsAreZero.Off = FilePath;
61     } else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
62       CorrectlyRoundedSqrt.On = FilePath;
63     } else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
64       CorrectlyRoundedSqrt.Off = FilePath;
65     } else if (BaseName == "oclc_unsafe_math_on") {
66       UnsafeMath.On = FilePath;
67     } else if (BaseName == "oclc_unsafe_math_off") {
68       UnsafeMath.Off = FilePath;
69     } else if (BaseName == "oclc_wavefrontsize64_on") {
70       WavefrontSize64.On = FilePath;
71     } else if (BaseName == "oclc_wavefrontsize64_off") {
72       WavefrontSize64.Off = FilePath;
73     } else {
74       // Process all bitcode filenames that look like
75       // ocl_isa_version_XXX.amdgcn.bc
76       const StringRef DeviceLibPrefix = "oclc_isa_version_";
77       if (!BaseName.startswith(DeviceLibPrefix))
78         continue;
79 
80       StringRef IsaVersionNumber =
81         BaseName.drop_front(DeviceLibPrefix.size());
82 
83       llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
84       SmallString<8> Tmp;
85       LibDeviceMap.insert(
86         std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
87     }
88   }
89 }
90 
ParseHIPVersionFile(llvm::StringRef V)91 void RocmInstallationDetector::ParseHIPVersionFile(llvm::StringRef V) {
92   SmallVector<StringRef, 4> VersionParts;
93   V.split(VersionParts, '\n');
94   unsigned Major;
95   unsigned Minor;
96   for (auto Part : VersionParts) {
97     auto Splits = Part.split('=');
98     if (Splits.first == "HIP_VERSION_MAJOR")
99       Splits.second.getAsInteger(0, Major);
100     else if (Splits.first == "HIP_VERSION_MINOR")
101       Splits.second.getAsInteger(0, Minor);
102     else if (Splits.first == "HIP_VERSION_PATCH")
103       VersionPatch = Splits.second.str();
104   }
105   VersionMajorMinor = llvm::VersionTuple(Major, Minor);
106   DetectedVersion =
107       (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
108 }
109 
110 // For candidate specified by --rocm-path we do not do strict check.
111 SmallVector<RocmInstallationDetector::Candidate, 4>
getInstallationPathCandidates()112 RocmInstallationDetector::getInstallationPathCandidates() {
113   SmallVector<Candidate, 4> Candidates;
114   if (!RocmPathArg.empty()) {
115     Candidates.emplace_back(RocmPathArg.str());
116     return Candidates;
117   }
118 
119   // Try to find relative to the compiler binary.
120   const char *InstallDir = D.getInstalledDir();
121 
122   // Check both a normal Unix prefix position of the clang binary, as well as
123   // the Windows-esque layout the ROCm packages use with the host architecture
124   // subdirectory of bin.
125 
126   // Strip off directory (usually bin)
127   StringRef ParentDir = llvm::sys::path::parent_path(InstallDir);
128   StringRef ParentName = llvm::sys::path::filename(ParentDir);
129 
130   // Some builds use bin/{host arch}, so go up again.
131   if (ParentName == "bin") {
132     ParentDir = llvm::sys::path::parent_path(ParentDir);
133     ParentName = llvm::sys::path::filename(ParentDir);
134   }
135 
136   // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
137   if (ParentName == "llvm")
138     ParentDir = llvm::sys::path::parent_path(ParentDir);
139 
140   Candidates.emplace_back(ParentDir.str(), /*StrictChecking=*/true);
141 
142   // Device library may be installed in clang resource directory.
143   Candidates.emplace_back(D.ResourceDir, /*StrictChecking=*/true);
144 
145   Candidates.emplace_back(D.SysRoot + "/opt/rocm", /*StrictChecking=*/true);
146   return Candidates;
147 }
148 
RocmInstallationDetector(const Driver & D,const llvm::Triple & HostTriple,const llvm::opt::ArgList & Args,bool DetectHIPRuntime,bool DetectDeviceLib)149 RocmInstallationDetector::RocmInstallationDetector(
150     const Driver &D, const llvm::Triple &HostTriple,
151     const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib)
152     : D(D) {
153   RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ);
154   RocmDeviceLibPathArg =
155       Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
156   if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
157     HIPVersionArg = A->getValue();
158     unsigned Major = 0;
159     unsigned Minor = 0;
160     SmallVector<StringRef, 3> Parts;
161     HIPVersionArg.split(Parts, '.');
162     if (Parts.size())
163       Parts[0].getAsInteger(0, Major);
164     if (Parts.size() > 1)
165       Parts[1].getAsInteger(0, Minor);
166     if (Parts.size() > 2)
167       VersionPatch = Parts[2].str();
168     if (VersionPatch.empty())
169       VersionPatch = "0";
170     if (Major == 0 || Minor == 0)
171       D.Diag(diag::err_drv_invalid_value)
172           << A->getAsString(Args) << HIPVersionArg;
173 
174     VersionMajorMinor = llvm::VersionTuple(Major, Minor);
175     DetectedVersion =
176         (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
177   } else {
178     VersionPatch = DefaultVersionPatch;
179     VersionMajorMinor =
180         llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor);
181     DetectedVersion = (Twine(DefaultVersionMajor) + "." +
182                        Twine(DefaultVersionMinor) + "." + VersionPatch)
183                           .str();
184   }
185 
186   if (DetectHIPRuntime)
187     detectHIPRuntime();
188   if (DetectDeviceLib)
189     detectDeviceLibrary();
190 }
191 
detectDeviceLibrary()192 void RocmInstallationDetector::detectDeviceLibrary() {
193   assert(LibDevicePath.empty());
194 
195   if (!RocmDeviceLibPathArg.empty())
196     LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1];
197   else if (const char *LibPathEnv = ::getenv("HIP_DEVICE_LIB_PATH"))
198     LibDevicePath = LibPathEnv;
199 
200   auto &FS = D.getVFS();
201   if (!LibDevicePath.empty()) {
202     // Maintain compatability with HIP flag/envvar pointing directly at the
203     // bitcode library directory. This points directly at the library path instead
204     // of the rocm root installation.
205     if (!FS.exists(LibDevicePath))
206       return;
207 
208     scanLibDevicePath(LibDevicePath);
209     HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty();
210     return;
211   }
212 
213   // The install path situation in old versions of ROCm is a real mess, and
214   // use a different install layout. Multiple copies of the device libraries
215   // exist for each frontend project, and differ depending on which build
216   // system produced the packages. Standalone OpenCL builds also have a
217   // different directory structure from the ROCm OpenCL package.
218   auto Candidates = getInstallationPathCandidates();
219   for (const auto &Candidate : Candidates) {
220     auto CandidatePath = Candidate.Path;
221 
222     // Check device library exists at the given path.
223     auto CheckDeviceLib = [&](StringRef Path) {
224       bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
225       if (CheckLibDevice && !FS.exists(Path))
226         return false;
227 
228       scanLibDevicePath(Path);
229 
230       if (!NoBuiltinLibs) {
231         // Check that the required non-target libraries are all available.
232         if (!allGenericLibsValid())
233           return false;
234 
235         // Check that we have found at least one libdevice that we can link in
236         // if -nobuiltinlib hasn't been specified.
237         if (LibDeviceMap.empty())
238           return false;
239       }
240       return true;
241     };
242 
243     // The possible structures are:
244     // - ${ROCM_ROOT}/amdgcn/bitcode/*
245     // - ${ROCM_ROOT}/lib/*
246     // - ${ROCM_ROOT}/lib/bitcode/*
247     // so try to detect these layouts.
248     static constexpr std::array<const char *, 2> SubDirsList[] = {
249         {"amdgcn", "bitcode"},
250         {"lib", ""},
251         {"lib", "bitcode"},
252     };
253 
254     // Make a path by appending sub-directories to InstallPath.
255     auto MakePath = [&](const llvm::ArrayRef<const char *> &SubDirs) {
256       auto Path = CandidatePath;
257       for (auto SubDir : SubDirs)
258         llvm::sys::path::append(Path, SubDir);
259       return Path;
260     };
261 
262     for (auto SubDirs : SubDirsList) {
263       LibDevicePath = MakePath(SubDirs);
264       HasDeviceLibrary = CheckDeviceLib(LibDevicePath);
265       if (HasDeviceLibrary)
266         return;
267     }
268   }
269 }
270 
detectHIPRuntime()271 void RocmInstallationDetector::detectHIPRuntime() {
272   auto Candidates = getInstallationPathCandidates();
273   auto &FS = D.getVFS();
274 
275   for (const auto &Candidate : Candidates) {
276     InstallPath = Candidate.Path;
277     if (InstallPath.empty() || !FS.exists(InstallPath))
278       continue;
279 
280     BinPath = InstallPath;
281     llvm::sys::path::append(BinPath, "bin");
282     IncludePath = InstallPath;
283     llvm::sys::path::append(IncludePath, "include");
284     LibPath = InstallPath;
285     llvm::sys::path::append(LibPath, "lib");
286 
287     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
288         FS.getBufferForFile(BinPath + "/.hipVersion");
289     if (!VersionFile && Candidate.StrictChecking)
290       continue;
291 
292     if (HIPVersionArg.empty() && VersionFile)
293       ParseHIPVersionFile((*VersionFile)->getBuffer());
294 
295     HasHIPRuntime = true;
296     return;
297   }
298   HasHIPRuntime = false;
299 }
300 
print(raw_ostream & OS) const301 void RocmInstallationDetector::print(raw_ostream &OS) const {
302   if (hasHIPRuntime())
303     OS << "Found HIP installation: " << InstallPath << ", version "
304        << DetectedVersion << '\n';
305 }
306 
AddHIPIncludeArgs(const ArgList & DriverArgs,ArgStringList & CC1Args) const307 void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
308                                                  ArgStringList &CC1Args) const {
309   bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5);
310 
311   if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
312     // HIP header includes standard library wrapper headers under clang
313     // cuda_wrappers directory. Since these wrapper headers include_next
314     // standard C++ headers, whereas libc++ headers include_next other clang
315     // headers. The include paths have to follow this order:
316     // - wrapper include path
317     // - standard C++ include path
318     // - other clang include path
319     // Since standard C++ and other clang include paths are added in other
320     // places after this function, here we only need to make sure wrapper
321     // include path is added.
322     //
323     // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs
324     // a workaround.
325     SmallString<128> P(D.ResourceDir);
326     if (UsesRuntimeWrapper)
327       llvm::sys::path::append(P, "include", "cuda_wrappers");
328     CC1Args.push_back("-internal-isystem");
329     CC1Args.push_back(DriverArgs.MakeArgString(P));
330   }
331 
332   if (DriverArgs.hasArg(options::OPT_nogpuinc))
333     return;
334 
335   if (!hasHIPRuntime()) {
336     D.Diag(diag::err_drv_no_hip_runtime);
337     return;
338   }
339 
340   CC1Args.push_back("-internal-isystem");
341   CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
342   if (UsesRuntimeWrapper)
343     CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
344 }
345 
ConstructJob(Compilation & C,const JobAction & JA,const InputInfo & Output,const InputInfoList & Inputs,const ArgList & Args,const char * LinkingOutput) const346 void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
347                                   const InputInfo &Output,
348                                   const InputInfoList &Inputs,
349                                   const ArgList &Args,
350                                   const char *LinkingOutput) const {
351 
352   std::string Linker = getToolChain().GetProgramPath(getShortName());
353   ArgStringList CmdArgs;
354   addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
355   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
356   CmdArgs.push_back("-shared");
357   CmdArgs.push_back("-o");
358   CmdArgs.push_back(Output.getFilename());
359   C.addCommand(std::make_unique<Command>(
360       JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker),
361       CmdArgs, Inputs, Output));
362 }
363 
getAMDGPUTargetFeatures(const Driver & D,const llvm::Triple & Triple,const llvm::opt::ArgList & Args,std::vector<StringRef> & Features)364 void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
365                                      const llvm::Triple &Triple,
366                                      const llvm::opt::ArgList &Args,
367                                      std::vector<StringRef> &Features) {
368   // Add target ID features to -target-feature options. No diagnostics should
369   // be emitted here since invalid target ID is diagnosed at other places.
370   StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
371   if (!TargetID.empty()) {
372     llvm::StringMap<bool> FeatureMap;
373     auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
374     if (OptionalGpuArch) {
375       StringRef GpuArch = OptionalGpuArch.getValue();
376       // Iterate through all possible target ID features for the given GPU.
377       // If it is mapped to true, add +feature.
378       // If it is mapped to false, add -feature.
379       // If it is not in the map (default), do not add it
380       for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) {
381         auto Pos = FeatureMap.find(Feature);
382         if (Pos == FeatureMap.end())
383           continue;
384         Features.push_back(Args.MakeArgStringRef(
385             (Twine(Pos->second ? "+" : "-") + Feature).str()));
386       }
387     }
388   }
389 
390   if (Args.hasFlag(options::OPT_mwavefrontsize64,
391                    options::OPT_mno_wavefrontsize64, false))
392     Features.push_back("+wavefrontsize64");
393 
394   handleTargetFeaturesGroup(
395     Args, Features, options::OPT_m_amdgpu_Features_Group);
396 }
397 
398 /// AMDGPU Toolchain
AMDGPUToolChain(const Driver & D,const llvm::Triple & Triple,const ArgList & Args)399 AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
400                                  const ArgList &Args)
401     : Generic_ELF(D, Triple, Args),
402       OptionsDefault(
403           {{options::OPT_O, "3"}, {options::OPT_cl_std_EQ, "CL1.2"}}) {
404   // Check code object version options. Emit warnings for legacy options
405   // and errors for the last invalid code object version options.
406   // It is done here to avoid repeated warning or error messages for
407   // each tool invocation.
408   (void)getOrCheckAMDGPUCodeObjectVersion(D, Args, /*Diagnose=*/true);
409 }
410 
buildLinker() const411 Tool *AMDGPUToolChain::buildLinker() const {
412   return new tools::amdgpu::Linker(*this);
413 }
414 
415 DerivedArgList *
TranslateArgs(const DerivedArgList & Args,StringRef BoundArch,Action::OffloadKind DeviceOffloadKind) const416 AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
417                                Action::OffloadKind DeviceOffloadKind) const {
418 
419   DerivedArgList *DAL =
420       Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
421 
422   const OptTable &Opts = getDriver().getOpts();
423 
424   if (!DAL)
425     DAL = new DerivedArgList(Args.getBaseArgs());
426 
427   for (Arg *A : Args) {
428     if (!shouldSkipArgument(A))
429       DAL->append(A);
430   }
431 
432   checkTargetID(*DAL);
433 
434   if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
435     return DAL;
436 
437   // Phase 1 (.cl -> .bc)
438   if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
439     DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit()
440                                                 ? options::OPT_m64
441                                                 : options::OPT_m32));
442 
443     // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
444     // as they defined that way in Options.td
445     if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4,
446                      options::OPT_Ofast))
447       DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),
448                         getOptionDefault(options::OPT_O));
449   }
450 
451   return DAL;
452 }
453 
getDefaultDenormsAreZeroForTarget(llvm::AMDGPU::GPUKind Kind)454 bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
455     llvm::AMDGPU::GPUKind Kind) {
456 
457   // Assume nothing without a specific target.
458   if (Kind == llvm::AMDGPU::GK_NONE)
459     return false;
460 
461   const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
462 
463   // Default to enabling f32 denormals by default on subtargets where fma is
464   // fast with denormals
465   const bool BothDenormAndFMAFast =
466       (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
467       (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
468   return !BothDenormAndFMAFast;
469 }
470 
getDefaultDenormalModeForType(const llvm::opt::ArgList & DriverArgs,const JobAction & JA,const llvm::fltSemantics * FPType) const471 llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
472     const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
473     const llvm::fltSemantics *FPType) const {
474   // Denormals should always be enabled for f16 and f64.
475   if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
476     return llvm::DenormalMode::getIEEE();
477 
478   if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
479       JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
480     auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch());
481     auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch);
482     if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
483         DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
484                            options::OPT_fno_cuda_flush_denormals_to_zero,
485                            getDefaultDenormsAreZeroForTarget(Kind)))
486       return llvm::DenormalMode::getPreserveSign();
487 
488     return llvm::DenormalMode::getIEEE();
489   }
490 
491   const StringRef GpuArch = getGPUArch(DriverArgs);
492   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
493 
494   // TODO: There are way too many flags that change this. Do we need to check
495   // them all?
496   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
497              getDefaultDenormsAreZeroForTarget(Kind);
498 
499   // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
500   // also implicit treated as zero (DAZ).
501   return DAZ ? llvm::DenormalMode::getPreserveSign() :
502                llvm::DenormalMode::getIEEE();
503 }
504 
isWave64(const llvm::opt::ArgList & DriverArgs,llvm::AMDGPU::GPUKind Kind)505 bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
506                                llvm::AMDGPU::GPUKind Kind) {
507   const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
508   bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
509 
510   return !HasWave32 || DriverArgs.hasFlag(
511     options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
512 }
513 
514 
515 /// ROCM Toolchain
ROCMToolChain(const Driver & D,const llvm::Triple & Triple,const ArgList & Args)516 ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
517                              const ArgList &Args)
518     : AMDGPUToolChain(D, Triple, Args) {
519   RocmInstallation.detectDeviceLibrary();
520 }
521 
addClangTargetOptions(const llvm::opt::ArgList & DriverArgs,llvm::opt::ArgStringList & CC1Args,Action::OffloadKind DeviceOffloadingKind) const522 void AMDGPUToolChain::addClangTargetOptions(
523     const llvm::opt::ArgList &DriverArgs,
524     llvm::opt::ArgStringList &CC1Args,
525     Action::OffloadKind DeviceOffloadingKind) const {
526   // Default to "hidden" visibility, as object level linking will not be
527   // supported for the foreseeable future.
528   if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
529                          options::OPT_fvisibility_ms_compat)) {
530     CC1Args.push_back("-fvisibility");
531     CC1Args.push_back("hidden");
532     CC1Args.push_back("-fapply-global-visibility-to-externs");
533   }
534 }
535 
536 StringRef
getGPUArch(const llvm::opt::ArgList & DriverArgs) const537 AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
538   return getProcessorFromTargetID(
539       getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ));
540 }
541 
checkTargetID(const llvm::opt::ArgList & DriverArgs) const542 void AMDGPUToolChain::checkTargetID(
543     const llvm::opt::ArgList &DriverArgs) const {
544   StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
545   if (TargetID.empty())
546     return;
547 
548   llvm::StringMap<bool> FeatureMap;
549   auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap);
550   if (!OptionalGpuArch) {
551     getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetID;
552   }
553 }
554 
addClangTargetOptions(const llvm::opt::ArgList & DriverArgs,llvm::opt::ArgStringList & CC1Args,Action::OffloadKind DeviceOffloadingKind) const555 void ROCMToolChain::addClangTargetOptions(
556     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
557     Action::OffloadKind DeviceOffloadingKind) const {
558   AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
559                                          DeviceOffloadingKind);
560 
561   // For the OpenCL case where there is no offload target, accept -nostdlib to
562   // disable bitcode linking.
563   if (DeviceOffloadingKind == Action::OFK_None &&
564       DriverArgs.hasArg(options::OPT_nostdlib))
565     return;
566 
567   if (DriverArgs.hasArg(options::OPT_nogpulib))
568     return;
569 
570   if (!RocmInstallation.hasDeviceLibrary()) {
571     getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
572     return;
573   }
574 
575   // Get the device name and canonicalize it
576   const StringRef GpuArch = getGPUArch(DriverArgs);
577   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
578   const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
579   std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
580   if (LibDeviceFile.empty()) {
581     getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
582     return;
583   }
584 
585   bool Wave64 = isWave64(DriverArgs, Kind);
586 
587   // TODO: There are way too many flags that change this. Do we need to check
588   // them all?
589   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
590              getDefaultDenormsAreZeroForTarget(Kind);
591   bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
592 
593   bool UnsafeMathOpt =
594       DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
595   bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
596   bool CorrectSqrt =
597       DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
598 
599   // Add the OpenCL specific bitcode library.
600   CC1Args.push_back("-mlink-builtin-bitcode");
601   CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath()));
602 
603   // Add the generic set of libraries.
604   RocmInstallation.addCommonBitcodeLibCC1Args(
605       DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly,
606       UnsafeMathOpt, FastRelaxedMath, CorrectSqrt);
607 }
608 
addCommonBitcodeLibCC1Args(const llvm::opt::ArgList & DriverArgs,llvm::opt::ArgStringList & CC1Args,StringRef LibDeviceFile,bool Wave64,bool DAZ,bool FiniteOnly,bool UnsafeMathOpt,bool FastRelaxedMath,bool CorrectSqrt) const609 void RocmInstallationDetector::addCommonBitcodeLibCC1Args(
610     const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
611     StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly,
612     bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const {
613   static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode";
614 
615   CC1Args.push_back(LinkBitcodeFlag);
616   CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath()));
617 
618   CC1Args.push_back(LinkBitcodeFlag);
619   CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath()));
620 
621   CC1Args.push_back(LinkBitcodeFlag);
622   CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ)));
623 
624   CC1Args.push_back(LinkBitcodeFlag);
625   CC1Args.push_back(DriverArgs.MakeArgString(
626       getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)));
627 
628   CC1Args.push_back(LinkBitcodeFlag);
629   CC1Args.push_back(DriverArgs.MakeArgString(
630       getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)));
631 
632   CC1Args.push_back(LinkBitcodeFlag);
633   CC1Args.push_back(
634       DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt)));
635 
636   CC1Args.push_back(LinkBitcodeFlag);
637   CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64)));
638 
639   CC1Args.push_back(LinkBitcodeFlag);
640   CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
641 }
642 
shouldSkipArgument(const llvm::opt::Arg * A) const643 bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const {
644   Option O = A->getOption();
645   if (O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie))
646     return true;
647   return false;
648 }
649