1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4; The bitcast should be pushed through the bitcasts so the vectors can 5; be broken down and the shared components can be CSEd 6 7; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v8f32: 8; GCN: buffer_store_dwordx4 9; GCN: buffer_store_dwordx4 10; GCN-NOT: v_mov_b32 11; GCN: buffer_store_dwordx4 12; GCN-NOT: v_mov_b32 13; GCN: buffer_store_dwordx4 14define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v8f32(<8 x float> addrspace(1)* %out, <8 x i32> %vec) { 15 %vec0.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8> to <8 x float> 16 store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out 17 18 %vec1.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 9> to <8 x float> 19 store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out 20 ret void 21} 22 23; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v8f32: 24; GCN: buffer_store_dwordx4 25; GCN: buffer_store_dwordx4 26; GCN-NOT: v_mov_b32 27; GCN: buffer_store_dwordx4 28; GCN-NOT: v_mov_b32 29; GCN: buffer_store_dwordx4 30define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v8f32(<8 x float> addrspace(1)* %out, <4 x i64> %vec) { 31 %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <8 x float> 32 store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out 33 34 %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <8 x float> 35 store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out 36 ret void 37} 38 39; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v4f64: 40; GCN: buffer_store_dwordx4 41; GCN: buffer_store_dwordx4 42; GCN-NOT: v_mov_b32 43; GCN: buffer_store_dwordx4 44; GCN-NOT: v_mov_b32 45; GCN: buffer_store_dwordx4 46define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %vec) { 47 %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <4 x double> 48 store volatile <4 x double> %vec0.bc, <4 x double> addrspace(1)* %out 49 50 %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <4 x double> 51 store volatile <4 x double> %vec1.bc, <4 x double> addrspace(1)* %out 52 ret void 53} 54 55; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v16i16: 56; GCN: buffer_store_dwordx4 57; GCN: buffer_store_dwordx4 58; GCN-NOT: v_mov_b32 59; GCN: buffer_store_dwordx4 60; GCN-NOT: v_mov_b32 61; GCN: buffer_store_dwordx4 62define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %out, <16 x i16> %vec) { 63 %vec0.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 8> to <8 x float> 64 store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out 65 66 %vec1.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 9> to <8 x float> 67 store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out 68 ret void 69} 70 71; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source: 72; GCN-NOT: store_dword 73define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(<2 x i32> addrspace(1)* %out, i64 %a, i64 %b) #0 { 74 %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1 75 %bc = bitcast i64 %undef to <2 x i32> 76 store volatile <2 x i32> %bc, <2 x i32> addrspace(1)* %out 77 ret void 78} 79 80; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt: 81; GCN-NOT: store_dword 82define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(i32 addrspace(1)* %out, i64 %a, i64 %b) #0 { 83 %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1 84 %bc = bitcast i64 %undef to <2 x i32> 85 %elt1 = extractelement <2 x i32> %bc, i32 1 86 store volatile i32 %elt1, i32 addrspace(1)* %out 87 ret void 88} 89 90declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1 91 92attributes #0 = { nounwind } 93attributes #1 = { nounwind readnone convergent } 94