1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s 5 6define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 7; GFX9-LABEL: store_lds_v3i32: 8; GFX9: ; %bb.0: 9; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 10; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 11; GFX9-NEXT: s_waitcnt lgkmcnt(0) 12; GFX9-NEXT: v_mov_b32_e32 v3, s4 13; GFX9-NEXT: v_mov_b32_e32 v0, s0 14; GFX9-NEXT: v_mov_b32_e32 v1, s1 15; GFX9-NEXT: v_mov_b32_e32 v2, s2 16; GFX9-NEXT: ds_write_b96 v3, v[0:2] 17; GFX9-NEXT: s_endpgm 18; 19; GFX7-LABEL: store_lds_v3i32: 20; GFX7: ; %bb.0: 21; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 22; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 23; GFX7-NEXT: s_mov_b32 m0, -1 24; GFX7-NEXT: s_waitcnt lgkmcnt(0) 25; GFX7-NEXT: v_mov_b32_e32 v3, s4 26; GFX7-NEXT: v_mov_b32_e32 v0, s0 27; GFX7-NEXT: v_mov_b32_e32 v1, s1 28; GFX7-NEXT: v_mov_b32_e32 v2, s2 29; GFX7-NEXT: ds_write_b96 v3, v[0:2] 30; GFX7-NEXT: s_endpgm 31; 32; GFX6-LABEL: store_lds_v3i32: 33; GFX6: ; %bb.0: 34; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 35; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 36; GFX6-NEXT: s_mov_b32 m0, -1 37; GFX6-NEXT: s_waitcnt lgkmcnt(0) 38; GFX6-NEXT: v_mov_b32_e32 v2, s4 39; GFX6-NEXT: v_mov_b32_e32 v1, s2 40; GFX6-NEXT: ds_write_b32 v2, v1 offset:8 41; GFX6-NEXT: v_mov_b32_e32 v0, s0 42; GFX6-NEXT: v_mov_b32_e32 v1, s1 43; GFX6-NEXT: ds_write_b64 v2, v[0:1] 44; GFX6-NEXT: s_endpgm 45 store <3 x i32> %x, <3 x i32> addrspace(3)* %out 46 ret void 47} 48 49define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 50; GFX9-LABEL: store_lds_v3i32_align1: 51; GFX9: ; %bb.0: 52; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 53; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 54; GFX9-NEXT: s_waitcnt lgkmcnt(0) 55; GFX9-NEXT: v_mov_b32_e32 v0, s4 56; GFX9-NEXT: v_mov_b32_e32 v1, s2 57; GFX9-NEXT: v_mov_b32_e32 v2, s1 58; GFX9-NEXT: ds_write_b8 v0, v1 offset:8 59; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:10 60; GFX9-NEXT: ds_write_b8 v0, v2 offset:4 61; GFX9-NEXT: ds_write_b8_d16_hi v0, v2 offset:6 62; GFX9-NEXT: v_mov_b32_e32 v1, s0 63; GFX9-NEXT: s_lshr_b32 s3, s2, 8 64; GFX9-NEXT: ds_write_b8 v0, v1 65; GFX9-NEXT: ds_write_b8_d16_hi v0, v1 offset:2 66; GFX9-NEXT: v_mov_b32_e32 v1, s3 67; GFX9-NEXT: s_lshr_b32 s2, s2, 24 68; GFX9-NEXT: ds_write_b8 v0, v1 offset:9 69; GFX9-NEXT: v_mov_b32_e32 v1, s2 70; GFX9-NEXT: s_lshr_b32 s2, s1, 8 71; GFX9-NEXT: ds_write_b8 v0, v1 offset:11 72; GFX9-NEXT: v_mov_b32_e32 v1, s2 73; GFX9-NEXT: s_lshr_b32 s1, s1, 24 74; GFX9-NEXT: ds_write_b8 v0, v1 offset:5 75; GFX9-NEXT: v_mov_b32_e32 v1, s1 76; GFX9-NEXT: s_lshr_b32 s1, s0, 8 77; GFX9-NEXT: ds_write_b8 v0, v1 offset:7 78; GFX9-NEXT: v_mov_b32_e32 v1, s1 79; GFX9-NEXT: s_lshr_b32 s0, s0, 24 80; GFX9-NEXT: ds_write_b8 v0, v1 offset:1 81; GFX9-NEXT: v_mov_b32_e32 v1, s0 82; GFX9-NEXT: ds_write_b8 v0, v1 offset:3 83; GFX9-NEXT: s_endpgm 84; 85; GFX7-LABEL: store_lds_v3i32_align1: 86; GFX7: ; %bb.0: 87; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 88; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 89; GFX7-NEXT: s_mov_b32 m0, -1 90; GFX7-NEXT: s_waitcnt lgkmcnt(0) 91; GFX7-NEXT: v_mov_b32_e32 v0, s4 92; GFX7-NEXT: v_mov_b32_e32 v1, s2 93; GFX7-NEXT: v_mov_b32_e32 v2, s1 94; GFX7-NEXT: ds_write_b8 v0, v1 offset:8 95; GFX7-NEXT: ds_write_b8 v0, v2 offset:4 96; GFX7-NEXT: v_mov_b32_e32 v1, s0 97; GFX7-NEXT: s_lshr_b32 s3, s2, 8 98; GFX7-NEXT: ds_write_b8 v0, v1 99; GFX7-NEXT: v_mov_b32_e32 v1, s3 100; GFX7-NEXT: s_lshr_b32 s3, s2, 24 101; GFX7-NEXT: ds_write_b8 v0, v1 offset:9 102; GFX7-NEXT: v_mov_b32_e32 v1, s3 103; GFX7-NEXT: s_lshr_b32 s2, s2, 16 104; GFX7-NEXT: ds_write_b8 v0, v1 offset:11 105; GFX7-NEXT: v_mov_b32_e32 v1, s2 106; GFX7-NEXT: s_lshr_b32 s2, s1, 8 107; GFX7-NEXT: ds_write_b8 v0, v1 offset:10 108; GFX7-NEXT: v_mov_b32_e32 v1, s2 109; GFX7-NEXT: s_lshr_b32 s2, s1, 24 110; GFX7-NEXT: ds_write_b8 v0, v1 offset:5 111; GFX7-NEXT: v_mov_b32_e32 v1, s2 112; GFX7-NEXT: s_lshr_b32 s1, s1, 16 113; GFX7-NEXT: ds_write_b8 v0, v1 offset:7 114; GFX7-NEXT: v_mov_b32_e32 v1, s1 115; GFX7-NEXT: s_lshr_b32 s1, s0, 8 116; GFX7-NEXT: ds_write_b8 v0, v1 offset:6 117; GFX7-NEXT: v_mov_b32_e32 v1, s1 118; GFX7-NEXT: s_lshr_b32 s1, s0, 24 119; GFX7-NEXT: ds_write_b8 v0, v1 offset:1 120; GFX7-NEXT: v_mov_b32_e32 v1, s1 121; GFX7-NEXT: s_lshr_b32 s0, s0, 16 122; GFX7-NEXT: ds_write_b8 v0, v1 offset:3 123; GFX7-NEXT: v_mov_b32_e32 v1, s0 124; GFX7-NEXT: ds_write_b8 v0, v1 offset:2 125; GFX7-NEXT: s_endpgm 126; 127; GFX6-LABEL: store_lds_v3i32_align1: 128; GFX6: ; %bb.0: 129; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 130; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 131; GFX6-NEXT: s_mov_b32 m0, -1 132; GFX6-NEXT: s_waitcnt lgkmcnt(0) 133; GFX6-NEXT: v_mov_b32_e32 v0, s4 134; GFX6-NEXT: v_mov_b32_e32 v1, s2 135; GFX6-NEXT: v_mov_b32_e32 v2, s1 136; GFX6-NEXT: ds_write_b8 v0, v1 offset:8 137; GFX6-NEXT: ds_write_b8 v0, v2 offset:4 138; GFX6-NEXT: v_mov_b32_e32 v1, s0 139; GFX6-NEXT: s_lshr_b32 s3, s2, 8 140; GFX6-NEXT: ds_write_b8 v0, v1 141; GFX6-NEXT: v_mov_b32_e32 v1, s3 142; GFX6-NEXT: s_lshr_b32 s3, s2, 24 143; GFX6-NEXT: ds_write_b8 v0, v1 offset:9 144; GFX6-NEXT: v_mov_b32_e32 v1, s3 145; GFX6-NEXT: s_lshr_b32 s2, s2, 16 146; GFX6-NEXT: ds_write_b8 v0, v1 offset:11 147; GFX6-NEXT: v_mov_b32_e32 v1, s2 148; GFX6-NEXT: s_lshr_b32 s2, s1, 8 149; GFX6-NEXT: ds_write_b8 v0, v1 offset:10 150; GFX6-NEXT: v_mov_b32_e32 v1, s2 151; GFX6-NEXT: s_lshr_b32 s2, s1, 24 152; GFX6-NEXT: ds_write_b8 v0, v1 offset:5 153; GFX6-NEXT: v_mov_b32_e32 v1, s2 154; GFX6-NEXT: s_lshr_b32 s1, s1, 16 155; GFX6-NEXT: ds_write_b8 v0, v1 offset:7 156; GFX6-NEXT: v_mov_b32_e32 v1, s1 157; GFX6-NEXT: s_lshr_b32 s1, s0, 8 158; GFX6-NEXT: ds_write_b8 v0, v1 offset:6 159; GFX6-NEXT: v_mov_b32_e32 v1, s1 160; GFX6-NEXT: s_lshr_b32 s1, s0, 24 161; GFX6-NEXT: ds_write_b8 v0, v1 offset:1 162; GFX6-NEXT: v_mov_b32_e32 v1, s1 163; GFX6-NEXT: s_lshr_b32 s0, s0, 16 164; GFX6-NEXT: ds_write_b8 v0, v1 offset:3 165; GFX6-NEXT: v_mov_b32_e32 v1, s0 166; GFX6-NEXT: ds_write_b8 v0, v1 offset:2 167; GFX6-NEXT: s_endpgm 168 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 1 169 ret void 170} 171 172define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 173; GFX9-LABEL: store_lds_v3i32_align2: 174; GFX9: ; %bb.0: 175; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 176; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 177; GFX9-NEXT: s_waitcnt lgkmcnt(0) 178; GFX9-NEXT: v_mov_b32_e32 v0, s4 179; GFX9-NEXT: v_mov_b32_e32 v1, s2 180; GFX9-NEXT: v_mov_b32_e32 v2, s1 181; GFX9-NEXT: ds_write_b16 v0, v1 offset:8 182; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:10 183; GFX9-NEXT: ds_write_b16 v0, v2 offset:4 184; GFX9-NEXT: ds_write_b16_d16_hi v0, v2 offset:6 185; GFX9-NEXT: v_mov_b32_e32 v1, s0 186; GFX9-NEXT: ds_write_b16 v0, v1 187; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:2 188; GFX9-NEXT: s_endpgm 189; 190; GFX7-LABEL: store_lds_v3i32_align2: 191; GFX7: ; %bb.0: 192; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 193; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 194; GFX7-NEXT: s_mov_b32 m0, -1 195; GFX7-NEXT: s_waitcnt lgkmcnt(0) 196; GFX7-NEXT: v_mov_b32_e32 v0, s4 197; GFX7-NEXT: v_mov_b32_e32 v1, s2 198; GFX7-NEXT: v_mov_b32_e32 v2, s1 199; GFX7-NEXT: ds_write_b16 v0, v1 offset:8 200; GFX7-NEXT: ds_write_b16 v0, v2 offset:4 201; GFX7-NEXT: v_mov_b32_e32 v1, s0 202; GFX7-NEXT: s_lshr_b32 s2, s2, 16 203; GFX7-NEXT: ds_write_b16 v0, v1 204; GFX7-NEXT: v_mov_b32_e32 v1, s2 205; GFX7-NEXT: s_lshr_b32 s1, s1, 16 206; GFX7-NEXT: ds_write_b16 v0, v1 offset:10 207; GFX7-NEXT: v_mov_b32_e32 v1, s1 208; GFX7-NEXT: s_lshr_b32 s0, s0, 16 209; GFX7-NEXT: ds_write_b16 v0, v1 offset:6 210; GFX7-NEXT: v_mov_b32_e32 v1, s0 211; GFX7-NEXT: ds_write_b16 v0, v1 offset:2 212; GFX7-NEXT: s_endpgm 213; 214; GFX6-LABEL: store_lds_v3i32_align2: 215; GFX6: ; %bb.0: 216; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 217; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 218; GFX6-NEXT: s_mov_b32 m0, -1 219; GFX6-NEXT: s_waitcnt lgkmcnt(0) 220; GFX6-NEXT: v_mov_b32_e32 v0, s4 221; GFX6-NEXT: v_mov_b32_e32 v1, s2 222; GFX6-NEXT: v_mov_b32_e32 v2, s1 223; GFX6-NEXT: ds_write_b16 v0, v1 offset:8 224; GFX6-NEXT: ds_write_b16 v0, v2 offset:4 225; GFX6-NEXT: v_mov_b32_e32 v1, s0 226; GFX6-NEXT: s_lshr_b32 s2, s2, 16 227; GFX6-NEXT: ds_write_b16 v0, v1 228; GFX6-NEXT: v_mov_b32_e32 v1, s2 229; GFX6-NEXT: s_lshr_b32 s1, s1, 16 230; GFX6-NEXT: ds_write_b16 v0, v1 offset:10 231; GFX6-NEXT: v_mov_b32_e32 v1, s1 232; GFX6-NEXT: s_lshr_b32 s0, s0, 16 233; GFX6-NEXT: ds_write_b16 v0, v1 offset:6 234; GFX6-NEXT: v_mov_b32_e32 v1, s0 235; GFX6-NEXT: ds_write_b16 v0, v1 offset:2 236; GFX6-NEXT: s_endpgm 237 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 2 238 ret void 239} 240 241define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 242; GFX9-LABEL: store_lds_v3i32_align4: 243; GFX9: ; %bb.0: 244; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 245; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 246; GFX9-NEXT: s_waitcnt lgkmcnt(0) 247; GFX9-NEXT: v_mov_b32_e32 v0, s4 248; GFX9-NEXT: v_mov_b32_e32 v1, s0 249; GFX9-NEXT: v_mov_b32_e32 v2, s1 250; GFX9-NEXT: v_mov_b32_e32 v3, s2 251; GFX9-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 252; GFX9-NEXT: ds_write_b32 v0, v3 offset:8 253; GFX9-NEXT: s_endpgm 254; 255; GFX7-LABEL: store_lds_v3i32_align4: 256; GFX7: ; %bb.0: 257; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 258; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 259; GFX7-NEXT: s_mov_b32 m0, -1 260; GFX7-NEXT: s_waitcnt lgkmcnt(0) 261; GFX7-NEXT: v_mov_b32_e32 v0, s4 262; GFX7-NEXT: v_mov_b32_e32 v1, s0 263; GFX7-NEXT: v_mov_b32_e32 v2, s1 264; GFX7-NEXT: ds_write2_b32 v0, v1, v2 offset1:1 265; GFX7-NEXT: v_mov_b32_e32 v1, s2 266; GFX7-NEXT: ds_write_b32 v0, v1 offset:8 267; GFX7-NEXT: s_endpgm 268; 269; GFX6-LABEL: store_lds_v3i32_align4: 270; GFX6: ; %bb.0: 271; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 272; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 273; GFX6-NEXT: s_mov_b32 m0, -1 274; GFX6-NEXT: s_waitcnt lgkmcnt(0) 275; GFX6-NEXT: v_mov_b32_e32 v0, s4 276; GFX6-NEXT: v_mov_b32_e32 v1, s1 277; GFX6-NEXT: v_mov_b32_e32 v2, s0 278; GFX6-NEXT: ds_write2_b32 v0, v2, v1 offset1:1 279; GFX6-NEXT: v_mov_b32_e32 v1, s2 280; GFX6-NEXT: ds_write_b32 v0, v1 offset:8 281; GFX6-NEXT: s_endpgm 282 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 4 283 ret void 284} 285 286define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 287; GFX9-LABEL: store_lds_v3i32_align8: 288; GFX9: ; %bb.0: 289; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 290; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 291; GFX9-NEXT: s_waitcnt lgkmcnt(0) 292; GFX9-NEXT: v_mov_b32_e32 v2, s4 293; GFX9-NEXT: v_mov_b32_e32 v3, s2 294; GFX9-NEXT: v_mov_b32_e32 v0, s0 295; GFX9-NEXT: v_mov_b32_e32 v1, s1 296; GFX9-NEXT: ds_write_b32 v2, v3 offset:8 297; GFX9-NEXT: ds_write_b64 v2, v[0:1] 298; GFX9-NEXT: s_endpgm 299; 300; GFX7-LABEL: store_lds_v3i32_align8: 301; GFX7: ; %bb.0: 302; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 303; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 304; GFX7-NEXT: s_mov_b32 m0, -1 305; GFX7-NEXT: s_waitcnt lgkmcnt(0) 306; GFX7-NEXT: v_mov_b32_e32 v2, s4 307; GFX7-NEXT: v_mov_b32_e32 v1, s2 308; GFX7-NEXT: ds_write_b32 v2, v1 offset:8 309; GFX7-NEXT: v_mov_b32_e32 v0, s0 310; GFX7-NEXT: v_mov_b32_e32 v1, s1 311; GFX7-NEXT: ds_write_b64 v2, v[0:1] 312; GFX7-NEXT: s_endpgm 313; 314; GFX6-LABEL: store_lds_v3i32_align8: 315; GFX6: ; %bb.0: 316; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 317; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 318; GFX6-NEXT: s_mov_b32 m0, -1 319; GFX6-NEXT: s_waitcnt lgkmcnt(0) 320; GFX6-NEXT: v_mov_b32_e32 v2, s4 321; GFX6-NEXT: v_mov_b32_e32 v1, s2 322; GFX6-NEXT: ds_write_b32 v2, v1 offset:8 323; GFX6-NEXT: v_mov_b32_e32 v0, s0 324; GFX6-NEXT: v_mov_b32_e32 v1, s1 325; GFX6-NEXT: ds_write_b64 v2, v[0:1] 326; GFX6-NEXT: s_endpgm 327 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 8 328 ret void 329} 330 331define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, <3 x i32> %x) { 332; GFX9-LABEL: store_lds_v3i32_align16: 333; GFX9: ; %bb.0: 334; GFX9-NEXT: s_load_dword s4, s[0:1], 0x24 335; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34 336; GFX9-NEXT: s_waitcnt lgkmcnt(0) 337; GFX9-NEXT: v_mov_b32_e32 v3, s4 338; GFX9-NEXT: v_mov_b32_e32 v0, s0 339; GFX9-NEXT: v_mov_b32_e32 v1, s1 340; GFX9-NEXT: v_mov_b32_e32 v2, s2 341; GFX9-NEXT: ds_write_b96 v3, v[0:2] 342; GFX9-NEXT: s_endpgm 343; 344; GFX7-LABEL: store_lds_v3i32_align16: 345; GFX7: ; %bb.0: 346; GFX7-NEXT: s_load_dword s4, s[0:1], 0x9 347; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 348; GFX7-NEXT: s_mov_b32 m0, -1 349; GFX7-NEXT: s_waitcnt lgkmcnt(0) 350; GFX7-NEXT: v_mov_b32_e32 v3, s4 351; GFX7-NEXT: v_mov_b32_e32 v0, s0 352; GFX7-NEXT: v_mov_b32_e32 v1, s1 353; GFX7-NEXT: v_mov_b32_e32 v2, s2 354; GFX7-NEXT: ds_write_b96 v3, v[0:2] 355; GFX7-NEXT: s_endpgm 356; 357; GFX6-LABEL: store_lds_v3i32_align16: 358; GFX6: ; %bb.0: 359; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 360; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 361; GFX6-NEXT: s_mov_b32 m0, -1 362; GFX6-NEXT: s_waitcnt lgkmcnt(0) 363; GFX6-NEXT: v_mov_b32_e32 v2, s4 364; GFX6-NEXT: v_mov_b32_e32 v1, s2 365; GFX6-NEXT: ds_write_b32 v2, v1 offset:8 366; GFX6-NEXT: v_mov_b32_e32 v0, s0 367; GFX6-NEXT: v_mov_b32_e32 v1, s1 368; GFX6-NEXT: ds_write_b64 v2, v[0:1] 369; GFX6-NEXT: s_endpgm 370 store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 16 371 ret void 372} 373