1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=CI %s 3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s 4 5; GCN-LABEL: {{^}}global_store_v3i64: 6; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 7; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 8define amdgpu_kernel void @global_store_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %x) { 9 store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 32 10 ret void 11} 12 13; GCN-LABEL: {{^}}global_store_v3i64_unaligned: 14; GCN: buffer_store_byte 15; GCN: buffer_store_byte 16; GCN: buffer_store_byte 17; GCN: buffer_store_byte 18 19; GCN: buffer_store_byte 20; GCN: buffer_store_byte 21; GCN: buffer_store_byte 22; GCN: buffer_store_byte 23 24; GCN: buffer_store_byte 25; GCN: buffer_store_byte 26; GCN: buffer_store_byte 27; GCN: buffer_store_byte 28 29; GCN: buffer_store_byte 30; GCN: buffer_store_byte 31; GCN: buffer_store_byte 32; GCN: buffer_store_byte 33 34; GCN: buffer_store_byte 35; GCN: buffer_store_byte 36; GCN: buffer_store_byte 37; GCN: buffer_store_byte 38 39; GCN: buffer_store_byte 40; GCN: buffer_store_byte 41; GCN: buffer_store_byte 42; GCN: buffer_store_byte 43define amdgpu_kernel void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64> %x) { 44 store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 1 45 ret void 46} 47 48; GCN-LABEL: {{^}}local_store_v3i64: 49; GCN: ds_write2_b64 50; GCN: ds_write_b64 51define amdgpu_kernel void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) { 52 store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 32 53 ret void 54} 55 56; GCN-LABEL: {{^}}local_store_v3i64_unaligned: 57; GCN: ds_write_b8 58; GCN: ds_write_b8 59; GCN: ds_write_b8 60; GCN: ds_write_b8 61 62; GCN: ds_write_b8 63; GCN: ds_write_b8 64; GCN: ds_write_b8 65; GCN: ds_write_b8 66 67; GCN: ds_write_b8 68; GCN: ds_write_b8 69; GCN: ds_write_b8 70; GCN: ds_write_b8 71 72; GCN: ds_write_b8 73; GCN: ds_write_b8 74; GCN: ds_write_b8 75; GCN: ds_write_b8 76 77; GCN: ds_write_b8 78; GCN: ds_write_b8 79; GCN: ds_write_b8 80; GCN: ds_write_b8 81 82; GCN: ds_write_b8 83; GCN: ds_write_b8 84; GCN: ds_write_b8 85; GCN: ds_write_b8 86define amdgpu_kernel void @local_store_v3i64_unaligned(<3 x i64> addrspace(3)* %out, <3 x i64> %x) { 87 store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 1 88 ret void 89} 90 91; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i32: 92; GCN-DAG: buffer_store_dwordx2 93; GCN-DAG: buffer_store_dword v 94define amdgpu_kernel void @global_truncstore_v3i64_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i64> %x) { 95 %trunc = trunc <3 x i64> %x to <3 x i32> 96 store <3 x i32> %trunc, <3 x i32> addrspace(1)* %out 97 ret void 98} 99 100; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i16: 101; GCN-DAG: buffer_store_short 102; GCN-DAG: buffer_store_dword v 103define amdgpu_kernel void @global_truncstore_v3i64_to_v3i16(<3 x i16> addrspace(1)* %out, <3 x i64> %x) { 104 %trunc = trunc <3 x i64> %x to <3 x i16> 105 store <3 x i16> %trunc, <3 x i16> addrspace(1)* %out 106 ret void 107} 108 109 110; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i8: 111; GCN-DAG: buffer_store_short 112; GCN-DAG: buffer_store_byte v 113define amdgpu_kernel void @global_truncstore_v3i64_to_v3i8(<3 x i8> addrspace(1)* %out, <3 x i64> %x) { 114 %trunc = trunc <3 x i64> %x to <3 x i8> 115 store <3 x i8> %trunc, <3 x i8> addrspace(1)* %out 116 ret void 117} 118 119; GCN-LABEL: {{^}}global_truncstore_v3i64_to_v3i1: 120; GCN-DAG: buffer_store_byte v 121; GCN-DAG: buffer_store_byte v 122; GCN-DAG: buffer_store_byte v 123define amdgpu_kernel void @global_truncstore_v3i64_to_v3i1(<3 x i1> addrspace(1)* %out, <3 x i64> %x) { 124 %trunc = trunc <3 x i64> %x to <3 x i1> 125 store <3 x i1> %trunc, <3 x i1> addrspace(1)* %out 126 ret void 127} 128