1;; bash$ cat repro.cl 2;; void device_kernel(__local float* ptr0, __local float* ptr1) { 3;; *ptr0 = 0; 4;; *ptr1 = 1; 5;; } 6;; 7;; __kernel void host_kernel(uint size) { 8;; void(^block)(__local void*, __local void*) = ^(__local void* ptr0, __local void* ptr1){ 9;; device_kernel(ptr0, ptr1); 10;; }; 11;; 12;; uint wgSize = get_kernel_work_group_size(block); 13;; uint prefMul = get_kernel_preferred_work_group_size_multiple(block); 14;; enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(1), 15;; 0, NULL, NULL, block, size, wgSize * prefMul); 16;; } 17;; bash$ 18;; bash$ export PATH_TO_INCLUDE= $PATH_TO_GEN/lib/clang/3.6.1/include 19;; bash$ $PATH_TO_GEN/bin/clang -cc1 -x cl -cl-std=CL2.0 -triple spir64-unknonw-unknown -emit-llvm -include opencl-20.h repro.cl -o device_execution.ll 20 21;; 1. Check mangling of device execution built-ins for blocks with local memory arguments 22;; 2. Check there is an enqueue_kernel with ellipsis 23 24; RUN: llvm-as %s -o %t.bc 25; RUN: llvm-spirv %t.bc -o %t.spv 26; RUN: llvm-spirv -r %t.spv -o %t.bc 27; RUN: llvm-dis < %t.bc | FileCheck %s 28 29; ModuleID = 'repro.cl' 30target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" 31target triple = "spir64-unknonw-unknown" 32 33%opencl.block = type opaque 34%struct.ndrange_t = type { i32, [3 x i64], [3 x i64], [3 x i64] } 35%opencl.queue_t = type opaque 36%opencl.clk_event_t = type opaque 37 38; Function Attrs: nounwind 39define spir_func void @device_kernel(float addrspace(3)* %ptr0, float addrspace(3)* %ptr1) #0 { 40entry: 41 %ptr0.addr = alloca float addrspace(3)*, align 8 42 %ptr1.addr = alloca float addrspace(3)*, align 8 43 store float addrspace(3)* %ptr0, float addrspace(3)** %ptr0.addr, align 8 44 store float addrspace(3)* %ptr1, float addrspace(3)** %ptr1.addr, align 8 45 %0 = load float addrspace(3)*, float addrspace(3)** %ptr0.addr, align 8 46 store float 0.000000e+00, float addrspace(3)* %0, align 4 47 %1 = load float addrspace(3)*, float addrspace(3)** %ptr1.addr, align 8 48 store float 1.000000e+00, float addrspace(3)* %1, align 4 49 ret void 50} 51 52; Function Attrs: nounwind 53define spir_kernel void @host_kernel(i32 %size) #0 { 54entry: 55 %size.addr = alloca i32, align 4 56 %block = alloca %opencl.block*, align 8 57 %wgSize = alloca i32, align 4 58 %prefMul = alloca i32, align 4 59 %agg.tmp = alloca %struct.ndrange_t, align 8 60 store i32 %size, i32* %size.addr, align 4 61 %0 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*, i8 addrspace(3)*, i8 addrspace(3)*)* @__host_kernel_block_invoke to i8*), i32 0, i32 0, i8* null) 62 store %opencl.block* %0, %opencl.block** %block, align 8 63 %1 = load %opencl.block*, %opencl.block** %block, align 8 64; CHECK: call {{.*}} @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE 65 %call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block* %1) 66 store i32 %call, i32* %wgSize, align 4 67 %2 = load %opencl.block*, %opencl.block** %block, align 8 68; CHECK: call {{.*}} @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE 69 %call1 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block* %2) 70 store i32 %call1, i32* %prefMul, align 4 71 %call2 = call spir_func %opencl.queue_t* @_Z17get_default_queuev() 72 call spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret %agg.tmp, i64 1) 73 %3 = load %opencl.block*, %opencl.block** %block, align 8 74 %4 = load i32, i32* %size.addr, align 4 75 %5 = load i32, i32* %wgSize, align 4 76 %6 = load i32, i32* %prefMul, align 4 77 %mul = mul i32 %5, %6 78; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}U13block_pointerFvPU3AS3vzEjz({{.*}}, %opencl.block* {{.*}}, i32 {{.*}}, i32 {{.*}}) 79 %call3 = call spir_func i32 (%opencl.queue_t*, i32, %struct.ndrange_t*, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...)* @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %agg.tmp, i32 0, %opencl.clk_event_t** null, %opencl.clk_event_t** null, %opencl.block* %3, i32 %4, i32 %mul) 80 ret void 81} 82 83; Function Attrs: nounwind 84; CHECK-LABEL: define {{.*}} @__host_kernel_block_invoke 85define internal spir_func void @__host_kernel_block_invoke(i8* %.block_descriptor, i8 addrspace(3)* %ptr0, i8 addrspace(3)* %ptr1) #0 { 86entry: 87 %.block_descriptor.addr = alloca i8*, align 8 88 %ptr0.addr = alloca i8 addrspace(3)*, align 8 89 %ptr1.addr = alloca i8 addrspace(3)*, align 8 90 %block.addr = alloca <{}>*, align 8 91 store i8* %.block_descriptor, i8** %.block_descriptor.addr, align 8 92 %0 = load i8*, i8** %.block_descriptor.addr 93 store i8 addrspace(3)* %ptr0, i8 addrspace(3)** %ptr0.addr, align 8 94 store i8 addrspace(3)* %ptr1, i8 addrspace(3)** %ptr1.addr, align 8 95 %block = bitcast i8* %.block_descriptor to <{}>* 96 store <{}>* %block, <{}>** %block.addr, align 8 97 %1 = load i8 addrspace(3)*, i8 addrspace(3)** %ptr0.addr, align 8 98 %2 = bitcast i8 addrspace(3)* %1 to float addrspace(3)* 99 %3 = load i8 addrspace(3)*, i8 addrspace(3)** %ptr1.addr, align 8 100 %4 = bitcast i8 addrspace(3)* %3 to float addrspace(3)* 101 call spir_func void @device_kernel(float addrspace(3)* %2, float addrspace(3)* %4) 102 ret void 103} 104 105declare %opencl.block* @spir_block_bind(i8*, i32, i32, i8*) 106 107declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvPU3AS3vzE(%opencl.block*) #1 108 109declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvPU3AS3vzE(%opencl.block*) #1 110 111; CHECK: declare {{.*}} @_Z14enqueue_kernel{{.*}}U13block_pointerFvPU3AS3vzEjz({{.*}}, %opencl.block*, i32, ...) 112declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tjPK12ocl_clkeventP12ocl_clkeventU13block_pointerFvPU3AS3vzEjz(%opencl.queue_t*, i32, %struct.ndrange_t* byval, i32, %opencl.clk_event_t**, %opencl.clk_event_t**, %opencl.block*, i32, ...) #1 113 114declare spir_func %opencl.queue_t* @_Z17get_default_queuev() #1 115 116declare spir_func void @_Z10ndrange_1Dm(%struct.ndrange_t* sret, i64) #1 117 118attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 119attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 120 121!opencl.kernels = !{!0} 122!opencl.enable.FP_CONTRACT = !{} 123!opencl.spir.version = !{!6} 124!opencl.ocl.version = !{!7} 125!opencl.used.extensions = !{!8} 126!opencl.used.optional.core.features = !{!8} 127!opencl.compiler.options = !{!8} 128 129!0 = !{void (i32)* @host_kernel, !1, !2, !3, !4, !5} 130!1 = !{!"kernel_arg_addr_space", i32 0} 131!2 = !{!"kernel_arg_access_qual", !"none"} 132!3 = !{!"kernel_arg_type", !"uint"} 133!4 = !{!"kernel_arg_base_type", !"uint"} 134!5 = !{!"kernel_arg_type_qual", !""} 135!6 = !{i32 1, i32 2} 136!7 = !{i32 2, i32 0} 137!8 = !{} 138