• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; Simple bit of IR to mimic CUDA's libdevice. We want to be
2; able to link with it and we need to make sure all __nvvm_reflect
3; calls are eliminated by the time PTX has been produced.
4
5target triple = "nvptx-unknown-cuda"
6
7declare i32 @__nvvm_reflect(i8*)
8
9@"$str" = private addrspace(1) constant [8 x i8] c"USE_MUL\00"
10
11define void @unused_subfunc(float %a) {
12       ret void
13}
14
15define void @used_subfunc(float %a) {
16       ret void
17}
18
19define float @_Z17device_mul_or_addff(float %a, float %b) {
20  %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
21  %cmp = icmp ne i32 %reflect, 0
22  br i1 %cmp, label %use_mul, label %use_add
23
24use_mul:
25  %ret1 = fmul float %a, %b
26  br label %exit
27
28use_add:
29  %ret2 = fadd float %a, %b
30  br label %exit
31
32exit:
33  %ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
34
35  call void @used_subfunc(float %ret)
36
37  ret float %ret
38}
39