1// REQUIRES: amdgpu-registered-target 2// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s 3// RUN: %clang_cc1 -triple amdgcn-unknown-unknown-opencl -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s 4 5#pragma OPENCL EXTENSION cl_khr_fp64 : enable 6#pragma OPENCL EXTENSION cl_khr_fp16 : enable 7 8typedef char __attribute__((ext_vector_type(2))) char2; 9typedef char __attribute__((ext_vector_type(3))) char3; 10typedef char __attribute__((ext_vector_type(4))) char4; 11typedef char __attribute__((ext_vector_type(8))) char8; 12typedef char __attribute__((ext_vector_type(16))) char16; 13 14typedef short __attribute__((ext_vector_type(2))) short2; 15typedef short __attribute__((ext_vector_type(3))) short3; 16typedef short __attribute__((ext_vector_type(4))) short4; 17typedef short __attribute__((ext_vector_type(8))) short8; 18typedef short __attribute__((ext_vector_type(16))) short16; 19 20typedef int __attribute__((ext_vector_type(2))) int2; 21typedef int __attribute__((ext_vector_type(3))) int3; 22typedef int __attribute__((ext_vector_type(4))) int4; 23typedef int __attribute__((ext_vector_type(8))) int8; 24typedef int __attribute__((ext_vector_type(16))) int16; 25 26typedef long __attribute__((ext_vector_type(2))) long2; 27typedef long __attribute__((ext_vector_type(3))) long3; 28typedef long __attribute__((ext_vector_type(4))) long4; 29typedef long __attribute__((ext_vector_type(8))) long8; 30typedef long __attribute__((ext_vector_type(16))) long16; 31 32typedef half __attribute__((ext_vector_type(2))) half2; 33typedef half __attribute__((ext_vector_type(3))) half3; 34typedef half __attribute__((ext_vector_type(4))) half4; 35typedef half __attribute__((ext_vector_type(8))) half8; 36typedef half __attribute__((ext_vector_type(16))) half16; 37 38typedef float __attribute__((ext_vector_type(2))) float2; 39typedef float __attribute__((ext_vector_type(3))) float3; 40typedef float __attribute__((ext_vector_type(4))) float4; 41typedef float __attribute__((ext_vector_type(8))) float8; 42typedef float __attribute__((ext_vector_type(16))) float16; 43 44typedef double __attribute__((ext_vector_type(2))) double2; 45typedef double __attribute__((ext_vector_type(3))) double3; 46typedef double __attribute__((ext_vector_type(4))) double4; 47typedef double __attribute__((ext_vector_type(8))) double8; 48typedef double __attribute__((ext_vector_type(16))) double16; 49 50// CHECK: @local_memory_alignment_global.lds_i8 = internal addrspace(3) global [4 x i8] undef, align 1 51// CHECK: @local_memory_alignment_global.lds_v2i8 = internal addrspace(3) global [4 x <2 x i8>] undef, align 2 52// CHECK: @local_memory_alignment_global.lds_v3i8 = internal addrspace(3) global [4 x <3 x i8>] undef, align 4 53// CHECK: @local_memory_alignment_global.lds_v4i8 = internal addrspace(3) global [4 x <4 x i8>] undef, align 4 54// CHECK: @local_memory_alignment_global.lds_v8i8 = internal addrspace(3) global [4 x <8 x i8>] undef, align 8 55// CHECK: @local_memory_alignment_global.lds_v16i8 = internal addrspace(3) global [4 x <16 x i8>] undef, align 16 56// CHECK: @local_memory_alignment_global.lds_i16 = internal addrspace(3) global [4 x i16] undef, align 2 57// CHECK: @local_memory_alignment_global.lds_v2i16 = internal addrspace(3) global [4 x <2 x i16>] undef, align 4 58// CHECK: @local_memory_alignment_global.lds_v3i16 = internal addrspace(3) global [4 x <3 x i16>] undef, align 8 59// CHECK: @local_memory_alignment_global.lds_v4i16 = internal addrspace(3) global [4 x <4 x i16>] undef, align 8 60// CHECK: @local_memory_alignment_global.lds_v8i16 = internal addrspace(3) global [4 x <8 x i16>] undef, align 16 61// CHECK: @local_memory_alignment_global.lds_v16i16 = internal addrspace(3) global [4 x <16 x i16>] undef, align 32 62// CHECK: @local_memory_alignment_global.lds_i32 = internal addrspace(3) global [4 x i32] undef, align 4 63// CHECK: @local_memory_alignment_global.lds_v2i32 = internal addrspace(3) global [4 x <2 x i32>] undef, align 8 64// CHECK: @local_memory_alignment_global.lds_v3i32 = internal addrspace(3) global [4 x <3 x i32>] undef, align 16 65// CHECK: @local_memory_alignment_global.lds_v4i32 = internal addrspace(3) global [4 x <4 x i32>] undef, align 16 66// CHECK: @local_memory_alignment_global.lds_v8i32 = internal addrspace(3) global [4 x <8 x i32>] undef, align 32 67// CHECK: @local_memory_alignment_global.lds_v16i32 = internal addrspace(3) global [4 x <16 x i32>] undef, align 64 68// CHECK: @local_memory_alignment_global.lds_i64 = internal addrspace(3) global [4 x i64] undef, align 8 69// CHECK: @local_memory_alignment_global.lds_v2i64 = internal addrspace(3) global [4 x <2 x i64>] undef, align 16 70// CHECK: @local_memory_alignment_global.lds_v3i64 = internal addrspace(3) global [4 x <3 x i64>] undef, align 32 71// CHECK: @local_memory_alignment_global.lds_v4i64 = internal addrspace(3) global [4 x <4 x i64>] undef, align 32 72// CHECK: @local_memory_alignment_global.lds_v8i64 = internal addrspace(3) global [4 x <8 x i64>] undef, align 64 73// CHECK: @local_memory_alignment_global.lds_v16i64 = internal addrspace(3) global [4 x <16 x i64>] undef, align 128 74// CHECK: @local_memory_alignment_global.lds_f16 = internal addrspace(3) global [4 x half] undef, align 2 75// CHECK: @local_memory_alignment_global.lds_v2f16 = internal addrspace(3) global [4 x <2 x half>] undef, align 4 76// CHECK: @local_memory_alignment_global.lds_v3f16 = internal addrspace(3) global [4 x <3 x half>] undef, align 8 77// CHECK: @local_memory_alignment_global.lds_v4f16 = internal addrspace(3) global [4 x <4 x half>] undef, align 8 78// CHECK: @local_memory_alignment_global.lds_v8f16 = internal addrspace(3) global [4 x <8 x half>] undef, align 16 79// CHECK: @local_memory_alignment_global.lds_v16f16 = internal addrspace(3) global [4 x <16 x half>] undef, align 32 80// CHECK: @local_memory_alignment_global.lds_f32 = internal addrspace(3) global [4 x float] undef, align 4 81// CHECK: @local_memory_alignment_global.lds_v2f32 = internal addrspace(3) global [4 x <2 x float>] undef, align 8 82// CHECK: @local_memory_alignment_global.lds_v3f32 = internal addrspace(3) global [4 x <3 x float>] undef, align 16 83// CHECK: @local_memory_alignment_global.lds_v4f32 = internal addrspace(3) global [4 x <4 x float>] undef, align 16 84// CHECK: @local_memory_alignment_global.lds_v8f32 = internal addrspace(3) global [4 x <8 x float>] undef, align 32 85// CHECK: @local_memory_alignment_global.lds_v16f32 = internal addrspace(3) global [4 x <16 x float>] undef, align 64 86// CHECK: @local_memory_alignment_global.lds_f64 = internal addrspace(3) global [4 x double] undef, align 8 87// CHECK: @local_memory_alignment_global.lds_v2f64 = internal addrspace(3) global [4 x <2 x double>] undef, align 16 88// CHECK: @local_memory_alignment_global.lds_v3f64 = internal addrspace(3) global [4 x <3 x double>] undef, align 32 89// CHECK: @local_memory_alignment_global.lds_v4f64 = internal addrspace(3) global [4 x <4 x double>] undef, align 32 90// CHECK: @local_memory_alignment_global.lds_v8f64 = internal addrspace(3) global [4 x <8 x double>] undef, align 64 91// CHECK: @local_memory_alignment_global.lds_v16f64 = internal addrspace(3) global [4 x <16 x double>] undef, align 128 92 93 94// CHECK-LABEL: @local_memory_alignment_global( 95// CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i64 0, i64 0), align 1 96// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i64 0, i64 0), align 2 97// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(3)* bitcast ([4 x <3 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v3i8 to <4 x i8> addrspace(3)*), align 4 98// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i64 0, i64 0), align 4 99// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i64 0, i64 0), align 8 100// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i64 0, i64 0), align 16 101// CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i64 0, i64 0), align 2 102// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i64 0, i64 0), align 4 103// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(3)* bitcast ([4 x <3 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v3i16 to <4 x i16> addrspace(3)*), align 8 104// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i64 0, i64 0), align 8 105// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i64 0, i64 0), align 16 106// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i64 0, i64 0), align 32 107// CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i64 0, i64 0), align 4 108// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i64 0, i64 0), align 8 109// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(3)* bitcast ([4 x <3 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v3i32 to <4 x i32> addrspace(3)*), align 16 110// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i64 0, i64 0), align 16 111// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i64 0, i64 0), align 32 112// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i64 0, i64 0), align 64 113// CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i64 0, i64 0), align 8 114// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i64 0, i64 0), align 16 115// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(3)* bitcast ([4 x <3 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v3i64 to <4 x i64> addrspace(3)*), align 32 116// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i64 0, i64 0), align 32 117// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i64 0, i64 0), align 64 118// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i64 0, i64 0), align 128 119// CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i64 0, i64 0), align 2 120// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i64 0, i64 0), align 4 121// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(3)* bitcast ([4 x <3 x half>] addrspace(3)* @local_memory_alignment_global.lds_v3f16 to <4 x half> addrspace(3)*), align 8 122// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i64 0, i64 0), align 8 123// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i64 0, i64 0), align 16 124// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i64 0, i64 0), align 32 125// CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i64 0, i64 0), align 4 126// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i64 0, i64 0), align 8 127// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(3)* bitcast ([4 x <3 x float>] addrspace(3)* @local_memory_alignment_global.lds_v3f32 to <4 x float> addrspace(3)*), align 16 128// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i64 0, i64 0), align 16 129// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i64 0, i64 0), align 32 130// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i64 0, i64 0), align 64 131// CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i64 0, i64 0), align 8 132// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i64 0, i64 0), align 16 133// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(3)* bitcast ([4 x <3 x double>] addrspace(3)* @local_memory_alignment_global.lds_v3f64 to <4 x double> addrspace(3)*), align 32 134// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i64 0, i64 0), align 32 135// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i64 0, i64 0), align 64 136// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i64 0, i64 0), align 128 137kernel void local_memory_alignment_global() 138{ 139 volatile local char lds_i8[4]; 140 volatile local char2 lds_v2i8[4]; 141 volatile local char3 lds_v3i8[4]; 142 volatile local char4 lds_v4i8[4]; 143 volatile local char8 lds_v8i8[4]; 144 volatile local char16 lds_v16i8[4]; 145 146 volatile local short lds_i16[4]; 147 volatile local short2 lds_v2i16[4]; 148 volatile local short3 lds_v3i16[4]; 149 volatile local short4 lds_v4i16[4]; 150 volatile local short8 lds_v8i16[4]; 151 volatile local short16 lds_v16i16[4]; 152 153 volatile local int lds_i32[4]; 154 volatile local int2 lds_v2i32[4]; 155 volatile local int3 lds_v3i32[4]; 156 volatile local int4 lds_v4i32[4]; 157 volatile local int8 lds_v8i32[4]; 158 volatile local int16 lds_v16i32[4]; 159 160 volatile local long lds_i64[4]; 161 volatile local long2 lds_v2i64[4]; 162 volatile local long3 lds_v3i64[4]; 163 volatile local long4 lds_v4i64[4]; 164 volatile local long8 lds_v8i64[4]; 165 volatile local long16 lds_v16i64[4]; 166 167 volatile local half lds_f16[4]; 168 volatile local half2 lds_v2f16[4]; 169 volatile local half3 lds_v3f16[4]; 170 volatile local half4 lds_v4f16[4]; 171 volatile local half8 lds_v8f16[4]; 172 volatile local half16 lds_v16f16[4]; 173 174 volatile local float lds_f32[4]; 175 volatile local float2 lds_v2f32[4]; 176 volatile local float3 lds_v3f32[4]; 177 volatile local float4 lds_v4f32[4]; 178 volatile local float8 lds_v8f32[4]; 179 volatile local float16 lds_v16f32[4]; 180 181 volatile local double lds_f64[4]; 182 volatile local double2 lds_v2f64[4]; 183 volatile local double3 lds_v3f64[4]; 184 volatile local double4 lds_v4f64[4]; 185 volatile local double8 lds_v8f64[4]; 186 volatile local double16 lds_v16f64[4]; 187 188 *lds_i8 = 0; 189 *lds_v2i8 = 0; 190 *lds_v3i8 = 0; 191 *lds_v4i8 = 0; 192 *lds_v8i8 = 0; 193 *lds_v16i8 = 0; 194 195 *lds_i16 = 0; 196 *lds_v2i16 = 0; 197 *lds_v3i16 = 0; 198 *lds_v4i16 = 0; 199 *lds_v8i16 = 0; 200 *lds_v16i16 = 0; 201 202 *lds_i32 = 0; 203 *lds_v2i32 = 0; 204 *lds_v3i32 = 0; 205 *lds_v4i32 = 0; 206 *lds_v8i32 = 0; 207 *lds_v16i32 = 0; 208 209 *lds_i64 = 0; 210 *lds_v2i64 = 0; 211 *lds_v3i64 = 0; 212 *lds_v4i64 = 0; 213 *lds_v8i64 = 0; 214 *lds_v16i64 = 0; 215 216 *lds_f16 = 0; 217 *lds_v2f16 = 0; 218 *lds_v3f16 = 0; 219 *lds_v4f16 = 0; 220 *lds_v8f16 = 0; 221 *lds_v16f16 = 0; 222 223 *lds_f32 = 0; 224 *lds_v2f32 = 0; 225 *lds_v3f32 = 0; 226 *lds_v4f32 = 0; 227 *lds_v8f32 = 0; 228 *lds_v16f32 = 0; 229 230 *lds_f64 = 0; 231 *lds_v2f64 = 0; 232 *lds_v3f64 = 0; 233 *lds_v4f64 = 0; 234 *lds_v8f64 = 0; 235 *lds_v16f64 = 0; 236} 237 238kernel void local_memory_alignment_arg( 239 volatile local char* lds_i8, 240 volatile local char2* lds_v2i8, 241 volatile local char3* lds_v3i8, 242 volatile local char4* lds_v4i8, 243 volatile local char8* lds_v8i8, 244 volatile local char16* lds_v16i8, 245 246 volatile local short* lds_i16, 247 volatile local short2* lds_v2i16, 248 volatile local short3* lds_v3i16, 249 volatile local short4* lds_v4i16, 250 volatile local short8* lds_v8i16, 251 volatile local short16* lds_v16i16, 252 253 volatile local int* lds_i32, 254 volatile local int2* lds_v2i32, 255 volatile local int3* lds_v3i32, 256 volatile local int4* lds_v4i32, 257 volatile local int8* lds_v8i32, 258 volatile local int16* lds_v16i32, 259 260 volatile local long* lds_i64, 261 volatile local long2* lds_v2i64, 262 volatile local long3* lds_v3i64, 263 volatile local long4* lds_v4i64, 264 volatile local long8* lds_v8i64, 265 volatile local long16* lds_v16i64, 266 267 volatile local half* lds_f16, 268 volatile local half2* lds_v2f16, 269 volatile local half3* lds_v3f16, 270 volatile local half4* lds_v4f16, 271 volatile local half8* lds_v8f16, 272 volatile local half16* lds_v16f16, 273 274 volatile local float* lds_f32, 275 volatile local float2* lds_v2f32, 276 volatile local float3* lds_v3f32, 277 volatile local float4* lds_v4f32, 278 volatile local float8* lds_v8f32, 279 volatile local float16* lds_v16f32, 280 281 volatile local double* lds_f64, 282 volatile local double2* lds_v2f64, 283 volatile local double3* lds_v3f64, 284 volatile local double4* lds_v4f64, 285 volatile local double8* lds_v8f64, 286 volatile local double16* lds_v16f64) 287{ 288 *lds_i8 = 0; 289 *lds_v2i8 = 0; 290 *lds_v3i8 = 0; 291 *lds_v4i8 = 0; 292 *lds_v8i8 = 0; 293 *lds_v16i8 = 0; 294 295 *lds_i16 = 0; 296 *lds_v2i16 = 0; 297 *lds_v3i16 = 0; 298 *lds_v4i16 = 0; 299 *lds_v8i16 = 0; 300 *lds_v16i16 = 0; 301 302 *lds_i32 = 0; 303 *lds_v2i32 = 0; 304 *lds_v3i32 = 0; 305 *lds_v4i32 = 0; 306 *lds_v8i32 = 0; 307 *lds_v16i32 = 0; 308 309 *lds_i64 = 0; 310 *lds_v2i64 = 0; 311 *lds_v3i64 = 0; 312 *lds_v4i64 = 0; 313 *lds_v8i64 = 0; 314 *lds_v16i64 = 0; 315 316 *lds_f16 = 0; 317 *lds_v2f16 = 0; 318 *lds_v3f16 = 0; 319 *lds_v4f16 = 0; 320 *lds_v8f16 = 0; 321 *lds_v16f16 = 0; 322 323 *lds_f32 = 0; 324 *lds_v2f32 = 0; 325 *lds_v3f32 = 0; 326 *lds_v4f32 = 0; 327 *lds_v8f32 = 0; 328 *lds_v16f32 = 0; 329 330 *lds_f64 = 0; 331 *lds_v2f64 = 0; 332 *lds_v3f64 = 0; 333 *lds_v4f64 = 0; 334 *lds_v8f64 = 0; 335 *lds_v16f64 = 0; 336} 337 338// CHECK-LABEL: @private_memory_alignment_alloca( 339// CHECK: %private_i8 = alloca [4 x i8], align 1, addrspace(5) 340// CHECK: %private_v2i8 = alloca [4 x <2 x i8>], align 2, addrspace(5) 341// CHECK: %private_v3i8 = alloca [4 x <3 x i8>], align 4, addrspace(5) 342// CHECK: %private_v4i8 = alloca [4 x <4 x i8>], align 4, addrspace(5) 343// CHECK: %private_v8i8 = alloca [4 x <8 x i8>], align 8, addrspace(5) 344// CHECK: %private_v16i8 = alloca [4 x <16 x i8>], align 16, addrspace(5) 345// CHECK: %private_i16 = alloca [4 x i16], align 2, addrspace(5) 346// CHECK: %private_v2i16 = alloca [4 x <2 x i16>], align 4, addrspace(5) 347// CHECK: %private_v3i16 = alloca [4 x <3 x i16>], align 8, addrspace(5) 348// CHECK: %private_v4i16 = alloca [4 x <4 x i16>], align 8, addrspace(5) 349// CHECK: %private_v8i16 = alloca [4 x <8 x i16>], align 16, addrspace(5) 350// CHECK: %private_v16i16 = alloca [4 x <16 x i16>], align 32, addrspace(5) 351// CHECK: %private_i32 = alloca [4 x i32], align 4, addrspace(5) 352// CHECK: %private_v2i32 = alloca [4 x <2 x i32>], align 8, addrspace(5) 353// CHECK: %private_v3i32 = alloca [4 x <3 x i32>], align 16, addrspace(5) 354// CHECK: %private_v4i32 = alloca [4 x <4 x i32>], align 16, addrspace(5) 355// CHECK: %private_v8i32 = alloca [4 x <8 x i32>], align 32, addrspace(5) 356// CHECK: %private_v16i32 = alloca [4 x <16 x i32>], align 64, addrspace(5) 357// CHECK: %private_i64 = alloca [4 x i64], align 8, addrspace(5) 358// CHECK: %private_v2i64 = alloca [4 x <2 x i64>], align 16, addrspace(5) 359// CHECK: %private_v3i64 = alloca [4 x <3 x i64>], align 32, addrspace(5) 360// CHECK: %private_v4i64 = alloca [4 x <4 x i64>], align 32, addrspace(5) 361// CHECK: %private_v8i64 = alloca [4 x <8 x i64>], align 64, addrspace(5) 362// CHECK: %private_v16i64 = alloca [4 x <16 x i64>], align 128, addrspace(5) 363// CHECK: %private_f16 = alloca [4 x half], align 2, addrspace(5) 364// CHECK: %private_v2f16 = alloca [4 x <2 x half>], align 4, addrspace(5) 365// CHECK: %private_v3f16 = alloca [4 x <3 x half>], align 8, addrspace(5) 366// CHECK: %private_v4f16 = alloca [4 x <4 x half>], align 8, addrspace(5) 367// CHECK: %private_v8f16 = alloca [4 x <8 x half>], align 16, addrspace(5) 368// CHECK: %private_v16f16 = alloca [4 x <16 x half>], align 32, addrspace(5) 369// CHECK: %private_f32 = alloca [4 x float], align 4, addrspace(5) 370// CHECK: %private_v2f32 = alloca [4 x <2 x float>], align 8, addrspace(5) 371// CHECK: %private_v3f32 = alloca [4 x <3 x float>], align 16, addrspace(5) 372// CHECK: %private_v4f32 = alloca [4 x <4 x float>], align 16, addrspace(5) 373// CHECK: %private_v8f32 = alloca [4 x <8 x float>], align 32, addrspace(5) 374// CHECK: %private_v16f32 = alloca [4 x <16 x float>], align 64, addrspace(5) 375// CHECK: %private_f64 = alloca [4 x double], align 8, addrspace(5) 376// CHECK: %private_v2f64 = alloca [4 x <2 x double>], align 16, addrspace(5) 377// CHECK: %private_v3f64 = alloca [4 x <3 x double>], align 32, addrspace(5) 378// CHECK: %private_v4f64 = alloca [4 x <4 x double>], align 32, addrspace(5) 379// CHECK: %private_v8f64 = alloca [4 x <8 x double>], align 64, addrspace(5) 380// CHECK: %private_v16f64 = alloca [4 x <16 x double>], align 128, addrspace(5) 381 382// CHECK: store volatile i8 0, i8 addrspace(5)* %arraydecay, align 1 383// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 2 384// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(5)* %storetmp, align 4 385// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 4 386// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 387// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 388// CHECK: store volatile i16 0, i16 addrspace(5)* %arraydecay{{[0-9]+}}, align 2 389// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 4 390// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(5)* %storetmp{{[0-9]+}}, align 8 391// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 392// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 393// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 394// CHECK: store volatile i32 0, i32 addrspace(5)* %arraydecay{{[0-9]+}}, align 4 395// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 396// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(5)* %storetmp16, align 16 397// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 398// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 399// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 64 400// CHECK: store volatile i64 0, i64 addrspace(5)* %arraydecay{{[0-9]+}}, align 8 401// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 402// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(5)* %storetmp23, align 32 403// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 404// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 64 405// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 128 406// CHECK: store volatile half 0xH0000, half addrspace(5)* %arraydecay{{[0-9]+}}, align 2 407// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 4 408// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(5)* %storetmp{{[0-9]+}}, align 8 409// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 410// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 411// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 412// CHECK: store volatile float 0.000000e+00, float addrspace(5)* %arraydecay34, align 4 413// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 8 414// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(5)* %storetmp{{[0-9]+}}, align 16 415// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 416// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 417// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 64 418// CHECK: store volatile double 0.000000e+00, double addrspace(5)* %arraydecay{{[0-9]+}}, align 8 419// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 16 420// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(5)* %storetmp{{[0-9]+}}, align 32 421// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 32 422// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 64 423// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 128 424kernel void private_memory_alignment_alloca() 425{ 426 volatile private char private_i8[4]; 427 volatile private char2 private_v2i8[4]; 428 volatile private char3 private_v3i8[4]; 429 volatile private char4 private_v4i8[4]; 430 volatile private char8 private_v8i8[4]; 431 volatile private char16 private_v16i8[4]; 432 433 volatile private short private_i16[4]; 434 volatile private short2 private_v2i16[4]; 435 volatile private short3 private_v3i16[4]; 436 volatile private short4 private_v4i16[4]; 437 volatile private short8 private_v8i16[4]; 438 volatile private short16 private_v16i16[4]; 439 440 volatile private int private_i32[4]; 441 volatile private int2 private_v2i32[4]; 442 volatile private int3 private_v3i32[4]; 443 volatile private int4 private_v4i32[4]; 444 volatile private int8 private_v8i32[4]; 445 volatile private int16 private_v16i32[4]; 446 447 volatile private long private_i64[4]; 448 volatile private long2 private_v2i64[4]; 449 volatile private long3 private_v3i64[4]; 450 volatile private long4 private_v4i64[4]; 451 volatile private long8 private_v8i64[4]; 452 volatile private long16 private_v16i64[4]; 453 454 volatile private half private_f16[4]; 455 volatile private half2 private_v2f16[4]; 456 volatile private half3 private_v3f16[4]; 457 volatile private half4 private_v4f16[4]; 458 volatile private half8 private_v8f16[4]; 459 volatile private half16 private_v16f16[4]; 460 461 volatile private float private_f32[4]; 462 volatile private float2 private_v2f32[4]; 463 volatile private float3 private_v3f32[4]; 464 volatile private float4 private_v4f32[4]; 465 volatile private float8 private_v8f32[4]; 466 volatile private float16 private_v16f32[4]; 467 468 volatile private double private_f64[4]; 469 volatile private double2 private_v2f64[4]; 470 volatile private double3 private_v3f64[4]; 471 volatile private double4 private_v4f64[4]; 472 volatile private double8 private_v8f64[4]; 473 volatile private double16 private_v16f64[4]; 474 475 *private_i8 = 0; 476 *private_v2i8 = 0; 477 *private_v3i8 = 0; 478 *private_v4i8 = 0; 479 *private_v8i8 = 0; 480 *private_v16i8 = 0; 481 482 *private_i16 = 0; 483 *private_v2i16 = 0; 484 *private_v3i16 = 0; 485 *private_v4i16 = 0; 486 *private_v8i16 = 0; 487 *private_v16i16 = 0; 488 489 *private_i32 = 0; 490 *private_v2i32 = 0; 491 *private_v3i32 = 0; 492 *private_v4i32 = 0; 493 *private_v8i32 = 0; 494 *private_v16i32 = 0; 495 496 *private_i64 = 0; 497 *private_v2i64 = 0; 498 *private_v3i64 = 0; 499 *private_v4i64 = 0; 500 *private_v8i64 = 0; 501 *private_v16i64 = 0; 502 503 *private_f16 = 0; 504 *private_v2f16 = 0; 505 *private_v3f16 = 0; 506 *private_v4f16 = 0; 507 *private_v8f16 = 0; 508 *private_v16f16 = 0; 509 510 *private_f32 = 0; 511 *private_v2f32 = 0; 512 *private_v3f32 = 0; 513 *private_v4f32 = 0; 514 *private_v8f32 = 0; 515 *private_v16f32 = 0; 516 517 *private_f64 = 0; 518 *private_v2f64 = 0; 519 *private_v3f64 = 0; 520 *private_v4f64 = 0; 521 *private_v8f64 = 0; 522 *private_v16f64 = 0; 523} 524