1// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s 2 3// Also test serialization of atomic operations here, to avoid duplicating the test. 4// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl 5// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s 6 7#ifndef ALREADY_INCLUDED 8#define ALREADY_INCLUDED 9 10typedef __INTPTR_TYPE__ intptr_t; 11typedef int int8 __attribute__((ext_vector_type(8))); 12 13typedef enum memory_order { 14 memory_order_relaxed = __ATOMIC_RELAXED, 15 memory_order_acquire = __ATOMIC_ACQUIRE, 16 memory_order_release = __ATOMIC_RELEASE, 17 memory_order_acq_rel = __ATOMIC_ACQ_REL, 18 memory_order_seq_cst = __ATOMIC_SEQ_CST 19} memory_order; 20 21typedef enum memory_scope { 22 memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, 23 memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, 24 memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, 25 memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, 26#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) 27 memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP 28#endif 29} memory_scope; 30 31atomic_int j; 32 33void fi1(atomic_int *i) { 34 // CHECK-LABEL: @fi1 35 // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst 36 int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); 37 38 // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst 39 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device); 40 41 // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst 42 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices); 43 44 // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst 45 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group); 46} 47 48void fi2(atomic_int *i) { 49 // CHECK-LABEL: @fi2 50 // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst 51 __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); 52} 53 54void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *il) { 55 // CHECK-LABEL: @test_addr 56 // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst 57 __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group); 58 59 // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst 60 __opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group); 61 62 // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst 63 __opencl_atomic_store(il, 1, memory_order_seq_cst, memory_scope_work_group); 64} 65 66void fi3(atomic_int *i, atomic_uint *ui) { 67 // CHECK-LABEL: @fi3 68 // CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst 69 int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group); 70 71 // CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst 72 x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group); 73 74 // CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst 75 x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group); 76 77 // CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst 78 x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group); 79 80 // CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst 81 x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group); 82} 83 84bool fi4(atomic_int *i) { 85 // CHECK-LABEL: @fi4( 86 // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire 87 // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0 88 // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1 89 // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]] 90 // CHECK: store i32 [[OLD]] 91 int cmp = 0; 92 return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group); 93} 94 95void fi5(atomic_int *i, int scope) { 96 // CHECK-LABEL: @fi5 97 // CHECK: switch i32 %{{.*}}, label %[[opencl_allsvmdevices:.*]] [ 98 // CHECK-NEXT: i32 1, label %[[opencl_workgroup:.*]] 99 // CHECK-NEXT: i32 2, label %[[opencl_device:.*]] 100 // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]] 101 // CHECK-NEXT: ] 102 // CHECK: [[opencl_workgroup]]: 103 // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst 104 // CHECK: br label %[[continue:.*]] 105 // CHECK: [[opencl_device]]: 106 // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst 107 // CHECK: br label %[[continue]] 108 // CHECK: [[opencl_allsvmdevices]]: 109 // CHECK: load atomic i32, i32* %{{.*}} seq_cst 110 // CHECK: br label %[[continue]] 111 // CHECK: [[opencl_subgroup]]: 112 // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst 113 // CHECK: br label %[[continue]] 114 // CHECK: [[continue]]: 115 int x = __opencl_atomic_load(i, memory_order_seq_cst, scope); 116} 117 118void fi6(atomic_int *i, int order, int scope) { 119 // CHECK-LABEL: @fi6 120 // CHECK: switch i32 %{{.*}}, label %[[monotonic:.*]] [ 121 // CHECK-NEXT: i32 1, label %[[acquire:.*]] 122 // CHECK-NEXT: i32 2, label %[[acquire:.*]] 123 // CHECK-NEXT: i32 5, label %[[seqcst:.*]] 124 // CHECK-NEXT: ] 125 // CHECK: [[monotonic]]: 126 // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [ 127 // CHECK-NEXT: i32 1, label %[[MON_WG:.*]] 128 // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]] 129 // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]] 130 // CHECK-NEXT: ] 131 // CHECK: [[acquire]]: 132 // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [ 133 // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]] 134 // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]] 135 // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]] 136 // CHECK-NEXT: ] 137 // CHECK: [[seqcst]]: 138 // CHECK: switch i32 %{{.*}}, label %[[SEQ_ALL:.*]] [ 139 // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]] 140 // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]] 141 // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]] 142 // CHECK-NEXT: ] 143 // CHECK: [[MON_WG]]: 144 // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic 145 // CHECK: [[MON_DEV]]: 146 // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic 147 // CHECK: [[MON_ALL]]: 148 // CHECK: load atomic i32, i32* %{{.*}} monotonic 149 // CHECK: [[MON_SUB]]: 150 // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic 151 // CHECK: [[ACQ_WG]]: 152 // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire 153 // CHECK: [[ACQ_DEV]]: 154 // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire 155 // CHECK: [[ACQ_ALL]]: 156 // CHECK: load atomic i32, i32* %{{.*}} acquire 157 // CHECK: [[ACQ_SUB]]: 158 // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire 159 // CHECK: [[SEQ_WG]]: 160 // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst 161 // CHECK: [[SEQ_DEV]]: 162 // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst 163 // CHECK: [[SEQ_ALL]]: 164 // CHECK: load atomic i32, i32* %{{.*}} seq_cst 165 // CHECK: [[SEQ_SUB]]: 166 // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst 167 int x = __opencl_atomic_load(i, order, scope); 168} 169 170float ff1(global atomic_float *d) { 171 // CHECK-LABEL: @ff1 172 // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic 173 return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group); 174} 175 176void ff2(atomic_float *d) { 177 // CHECK-LABEL: @ff2 178 // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release 179 __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group); 180} 181 182float ff3(atomic_float *d) { 183 // CHECK-LABEL: @ff3 184 // CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst 185 return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group); 186} 187 188// CHECK-LABEL: @atomic_init_foo 189void atomic_init_foo() 190{ 191 // CHECK-NOT: atomic 192 // CHECK: store 193 __opencl_atomic_init(&j, 42); 194 195 // CHECK-NOT: atomic 196 // CHECK: } 197} 198 199// CHECK-LABEL: @failureOrder 200void failureOrder(atomic_int *ptr, int *ptr2) { 201 // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic 202 __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); 203 204 // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire 205 __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group); 206} 207 208// CHECK-LABEL: @generalFailureOrder 209void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) { 210 __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group); 211 // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [ 212 // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]] 213 // CHECK-NEXT: i32 2, label %[[ACQUIRE]] 214 // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]] 215 // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]] 216 // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]] 217 218 // CHECK: [[MONOTONIC]] 219 // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [ 220 // CHECK-NEXT: ] 221 222 // CHECK: [[ACQUIRE]] 223 // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [ 224 // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] 225 // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] 226 // CHECK-NEXT: ] 227 228 // CHECK: [[RELEASE]] 229 // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [ 230 // CHECK-NEXT: ] 231 232 // CHECK: [[ACQREL]] 233 // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [ 234 // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] 235 // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] 236 // CHECK-NEXT: ] 237 238 // CHECK: [[SEQCST]] 239 // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [ 240 // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] 241 // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] 242 // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]] 243 // CHECK-NEXT: ] 244 245 // CHECK: [[MONOTONIC_MONOTONIC]] 246 // CHECK: cmpxchg {{.*}} monotonic monotonic 247 // CHECK: br 248 249 // CHECK: [[ACQUIRE_MONOTONIC]] 250 // CHECK: cmpxchg {{.*}} acquire monotonic 251 // CHECK: br 252 253 // CHECK: [[ACQUIRE_ACQUIRE]] 254 // CHECK: cmpxchg {{.*}} acquire acquire 255 // CHECK: br 256 257 // CHECK: [[ACQREL_MONOTONIC]] 258 // CHECK: cmpxchg {{.*}} acq_rel monotonic 259 // CHECK: br 260 261 // CHECK: [[ACQREL_ACQUIRE]] 262 // CHECK: cmpxchg {{.*}} acq_rel acquire 263 // CHECK: br 264 265 // CHECK: [[SEQCST_MONOTONIC]] 266 // CHECK: cmpxchg {{.*}} seq_cst monotonic 267 // CHECK: br 268 269 // CHECK: [[SEQCST_ACQUIRE]] 270 // CHECK: cmpxchg {{.*}} seq_cst acquire 271 // CHECK: br 272 273 // CHECK: [[SEQCST_SEQCST]] 274 // CHECK: cmpxchg {{.*}} seq_cst seq_cst 275 // CHECK: br 276} 277 278int test_volatile(volatile atomic_int *i) { 279 // CHECK-LABEL: @test_volatile 280 // CHECK: %[[i_addr:.*]] = alloca i32 281 // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32 282 // CHECK-NEXT: store i32* %i, i32* addrspace(5)* %[[i_addr]] 283 // CHECK-NEXT: %[[addr:.*]] = load i32*, i32* addrspace(5)* %[[i_addr]] 284 // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst 285 // CHECK-NEXT: store i32 %[[res]], i32 addrspace(5)* %[[atomicdst]] 286 // CHECK-NEXT: %[[retval:.*]] = load i32, i32 addrspace(5)* %[[atomicdst]] 287 // CHECK-NEXT: ret i32 %[[retval]] 288 return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); 289} 290 291#endif 292