• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for CUDA code generation targeting the NVIDIA CUDA
11 // runtime library.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "CGCUDARuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "clang/AST/Decl.h"
19 #include "llvm/BasicBlock.h"
20 #include "llvm/Constants.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/Support/CallSite.h"
23 
24 #include <vector>
25 
26 using namespace clang;
27 using namespace CodeGen;
28 
29 namespace {
30 
31 class CGNVCUDARuntime : public CGCUDARuntime {
32 
33 private:
34   llvm::Type *IntTy, *SizeTy;
35   llvm::PointerType *CharPtrTy, *VoidPtrTy;
36 
37   llvm::Constant *getSetupArgumentFn() const;
38   llvm::Constant *getLaunchFn() const;
39 
40 public:
41   CGNVCUDARuntime(CodeGenModule &CGM);
42 
43   void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
44 };
45 
46 }
47 
CGNVCUDARuntime(CodeGenModule & CGM)48 CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM) {
49   CodeGen::CodeGenTypes &Types = CGM.getTypes();
50   ASTContext &Ctx = CGM.getContext();
51 
52   IntTy = Types.ConvertType(Ctx.IntTy);
53   SizeTy = Types.ConvertType(Ctx.getSizeType());
54 
55   CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
56   VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
57 }
58 
getSetupArgumentFn() const59 llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
60   // cudaError_t cudaSetupArgument(void *, size_t, size_t)
61   std::vector<llvm::Type*> Params;
62   Params.push_back(VoidPtrTy);
63   Params.push_back(SizeTy);
64   Params.push_back(SizeTy);
65   return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
66                                                            Params, false),
67                                    "cudaSetupArgument");
68 }
69 
getLaunchFn() const70 llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
71   // cudaError_t cudaLaunch(char *)
72   std::vector<llvm::Type*> Params;
73   Params.push_back(CharPtrTy);
74   return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
75                                                            Params, false),
76                                    "cudaLaunch");
77 }
78 
EmitDeviceStubBody(CodeGenFunction & CGF,FunctionArgList & Args)79 void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF,
80                                          FunctionArgList &Args) {
81   // Build the argument value list and the argument stack struct type.
82   llvm::SmallVector<llvm::Value *, 16> ArgValues;
83   std::vector<llvm::Type *> ArgTypes;
84   for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
85        I != E; ++I) {
86     llvm::Value *V = CGF.GetAddrOfLocalVar(*I);
87     ArgValues.push_back(V);
88     assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType");
89     ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType());
90   }
91   llvm::StructType *ArgStackTy = llvm::StructType::get(
92       CGF.getLLVMContext(), ArgTypes);
93 
94   llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
95 
96   // Emit the calls to cudaSetupArgument
97   llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
98   for (unsigned I = 0, E = Args.size(); I != E; ++I) {
99     llvm::Value *Args[3];
100     llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
101     Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy);
102     Args[1] = CGF.Builder.CreateIntCast(
103         llvm::ConstantExpr::getSizeOf(ArgTypes[I]),
104         SizeTy, false);
105     Args[2] = CGF.Builder.CreateIntCast(
106         llvm::ConstantExpr::getOffsetOf(ArgStackTy, I),
107         SizeTy, false);
108     llvm::CallSite CS = CGF.EmitCallOrInvoke(cudaSetupArgFn, Args);
109     llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
110     llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
111     CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
112     CGF.EmitBlock(NextBlock);
113   }
114 
115   // Emit the call to cudaLaunch
116   llvm::Constant *cudaLaunchFn = getLaunchFn();
117   llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
118   CGF.EmitCallOrInvoke(cudaLaunchFn, Arg);
119   CGF.EmitBranch(EndBlock);
120 
121   CGF.EmitBlock(EndBlock);
122 }
123 
CreateNVCUDARuntime(CodeGenModule & CGM)124 CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
125   return new CGNVCUDARuntime(CGM);
126 }
127