1;; Test what the reader correctly mangles get_kernel_work_group_size, 2;; get_kernel_preferred_work_group_size_multiple, and enqueue_kernel built-ins and 3;; produces spir_block_bind for the both blocks one of which is w\o captured context. 4;; Notice what for the moment spir_block_bind is called as many times as how much 5;; built-ins what using it. This is not against SPIR 2.0 specification so it is done this 6;; way to simplify the reader implementation. 7;; 8;; See below how this LLVM IR has been obtained: 9;; bash$ 10;; bash$ cat device_execution_multiple_blocks.cl 11;; void block_fn(int arg, __global int* res) 12;; { 13;; *res = arg; 14;; } 15;; 16;; __global int glbRes = 0; 17;; void (^kernelBlockNoCtx)(void) = ^{ block_fn(1, &glbRes); }; 18;; 19;; kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res) 20;; { 21;; 22;; 23;; void (^kernelBlock)(void) = ^{ block_fn(2, res); }; 24;; uint globalSize = get_kernel_work_group_size(kernelBlock); 25;; uint multiple = get_kernel_preferred_work_group_size_multiple(kernelBlock); 26;; uint localSize = globalSize / multiple; 27;; 28;; queue_t q1 = get_default_queue(); 29;; ndrange_t ndrange = ndrange_1D(localSize, globalSize); 30;; enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); 31;; // Enqueue kernel w\o captured context 32;; enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlockNoCtx); 33;; } 34;; bash$ 35;; bash$ export PATH_TO_GEN=path_to_spir20_generator_install_dir 36;; bash$ $PATH_TO_GEN/bin/clang -cc1 -x cl -O2 -cl-std=CL2.0 -triple spir64-unknonw-unknown\ 37;; -emit-spirv -include $PATH_TO_GEN/lib/clang/3.6.1/include/opencl-20.h\ 38;; device_execution_multiple_blocks.cl -o device_execution_multiple_blocks.ll 39 40; RUN: llvm-as %s -o %t.bc 41; RUN: llvm-spirv %t.bc -o %t.spv 42; RUN: llvm-spirv -r %t.spv -o %t.bc 43; RUN: llvm-dis < %t.bc | FileCheck %s 44 45; ModuleID = 'device_execution_multiple_blocks.cl' 46target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" 47target triple = "spir64-unknonw-unknown" 48 49%struct.ndrange_t = type { i32, [3 x i64], [3 x i64], [3 x i64] } 50%opencl.block = type opaque 51%opencl.queue_t = type opaque 52 53@glbRes = addrspace(1) global i32 0, align 4 54 55; Function Attrs: nounwind 56define spir_func void @block_fn(i32 %arg, i32 addrspace(1)* nocapture %res) #0 { 57entry: 58 store i32 %arg, i32 addrspace(1)* %res, align 4 59 ret void 60} 61 62; Function Attrs: nounwind 63define internal spir_func void @kernelBlockNoCtx_block_invoke(i8* nocapture readnone %.block_descriptor) #0 { 64entry: 65 store i32 1, i32 addrspace(1)* @glbRes, align 4 66 ret void 67} 68 69; Function Attrs: nounwind 70define spir_kernel void @enqueue_block_get_kernel_preferred_work_group_size_multiple(i32 addrspace(1)* %res) #0 { 71entry: 72 %captured = alloca <{ i32 addrspace(1)* }>, align 8 73 %ndrange = alloca %struct.ndrange_t, align 8 74 %block.captured = getelementptr inbounds <{ i32 addrspace(1)* }>, <{ i32 addrspace(1)* }>* %captured, i64 0, i32 0 75 store i32 addrspace(1)* %res, i32 addrspace(1)** %block.captured, align 8 76 %0 = bitcast <{ i32 addrspace(1)* }>* %captured to i8* 77; CHECK: [[CTX:.*]] = bitcast %0* %captured to i8* 78 %1 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke to i8*), i32 8, i32 8, i8* %0) #2 79; CHECK: [[BLOCK0:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]]) 80; CHECK: call {{.*}} @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block*[[BLOCK0]]) 81 %call = call spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block* %1) #2 82; CHECK: [[BLOCK1:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]]) 83; CHECK: call {{.*}} @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block*[[BLOCK1]]) 84 %call1 = call spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block* %1) #2 85 %div = udiv i32 %call, %call1 86 %call2 = call spir_func %opencl.queue_t* @get_default_queue() #2 87 %conv = zext i32 %div to i64 88 %conv3 = zext i32 %call to i64 89 call spir_func void @_Z10ndrange_1Dmm(%struct.ndrange_t* sret %ndrange, i64 %conv, i64 %conv3) #2 90; CHECK: [[BLOCK2:.*]] = call {{.*}} @spir_block_bind({{.*}}@__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke{{.*}}, i32 8, i32 8, i8*[[CTX]]) 91; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}, %opencl.block*[[BLOCK2]]) 92 %call4 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tU13block_pointerFvvE(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %ndrange, %opencl.block* %1) #2 93; CHECK: [[BLOCK3:.*]] = call {{.*}} @spir_block_bind({{.*}}@kernelBlockNoCtx_block_invoke{{.*}}, i32 0, i32 0, i8* null) 94; CHECK: call {{.*}} @_Z14enqueue_kernel{{.*}}, %opencl.block*[[BLOCK3]]) 95 %2 = call %opencl.block* @spir_block_bind(i8* bitcast (void (i8*)* @kernelBlockNoCtx_block_invoke to i8*), i32 0, i32 0, i8* null) #2 96 %call5 = call spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tU13block_pointerFvvE(%opencl.queue_t* %call2, i32 241, %struct.ndrange_t* byval %ndrange, %opencl.block* %2) #2 97 ret void 98} 99 100; Function Attrs: nounwind 101define internal spir_func void @__enqueue_block_get_kernel_preferred_work_group_size_multiple_block_invoke(i8* nocapture readonly %.block_descriptor) #0 { 102entry: 103 %block.capture.addr = bitcast i8* %.block_descriptor to i32 addrspace(1)** 104 %0 = load i32 addrspace(1)*, i32 addrspace(1)** %block.capture.addr, align 8 105 store i32 2, i32 addrspace(1)* %0, align 4 106 ret void 107} 108 109declare %opencl.block* @spir_block_bind(i8*, i32, i32, i8*) 110 111declare spir_func i32 @_Z26get_kernel_work_group_sizeU13block_pointerFvvE(%opencl.block*) #1 112 113declare spir_func i32 @_Z45get_kernel_preferred_work_group_size_multipleU13block_pointerFvvE(%opencl.block*) #1 114 115declare spir_func %opencl.queue_t* @get_default_queue() #1 116 117declare spir_func void @_Z10ndrange_1Dmm(%struct.ndrange_t* sret, i64, i64) #1 118 119declare spir_func i32 @_Z14enqueue_kernel9ocl_queuei9ndrange_tU13block_pointerFvvE(%opencl.queue_t*, i32, %struct.ndrange_t* byval, %opencl.block*) #1 120 121attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 122attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 123attributes #2 = { nounwind } 124 125!opencl.kernels = !{!0} 126!opencl.enable.FP_CONTRACT = !{} 127!opencl.spir.version = !{!6} 128!opencl.ocl.version = !{!7} 129!opencl.used.extensions = !{!8} 130!opencl.used.optional.core.features = !{!8} 131!opencl.compiler.options = !{!8} 132!llvm.ident = !{!9} 133 134!0 = !{void (i32 addrspace(1)*)* @enqueue_block_get_kernel_preferred_work_group_size_multiple, !1, !2, !3, !4, !5} 135!1 = !{!"kernel_arg_addr_space", i32 1} 136!2 = !{!"kernel_arg_access_qual", !"none"} 137!3 = !{!"kernel_arg_type", !"int*"} 138!4 = !{!"kernel_arg_base_type", !"int*"} 139!5 = !{!"kernel_arg_type_qual", !""} 140!6 = !{i32 1, i32 2} 141!7 = !{i32 2, i32 0} 142!8 = !{} 143!9 = !{!"clang version 3.6.1 (https://github.com/KhronosGroup/SPIR.git 49a8b4a760d227b12116a79b2f7b2e34ef2e6879) (ssh://nnopencl-git-01.inn.intel.com/home/git/repo/opencl_qa-llvm d9b98710f905089caec167209da23af2e4f72bf0)"} 144