1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s 3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 4; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s 5; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s 6; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s 7 8; FUNC-LABEL: {{^}}constant_load_i32: 9; GCN: s_load_dword s{{[0-9]+}} 10 11; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 12define amdgpu_kernel void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) #0 { 13entry: 14 %ld = load i32, i32 addrspace(4)* %in 15 store i32 %ld, i32 addrspace(1)* %out 16 ret void 17} 18 19; FUNC-LABEL: {{^}}constant_load_v2i32: 20; GCN: s_load_dwordx2 21 22; EG: VTX_READ_64 23define amdgpu_kernel void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 { 24entry: 25 %ld = load <2 x i32>, <2 x i32> addrspace(4)* %in 26 store <2 x i32> %ld, <2 x i32> addrspace(1)* %out 27 ret void 28} 29 30; FUNC-LABEL: {{^}}constant_load_v3i32: 31; GCN: s_load_dwordx4 32 33; EG: VTX_READ_128 34define amdgpu_kernel void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(4)* %in) #0 { 35entry: 36 %ld = load <3 x i32>, <3 x i32> addrspace(4)* %in 37 store <3 x i32> %ld, <3 x i32> addrspace(1)* %out 38 ret void 39} 40 41; FUNC-LABEL: {{^}}constant_load_v4i32: 42; GCN: s_load_dwordx4 43 44; EG: VTX_READ_128 45define amdgpu_kernel void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 { 46entry: 47 %ld = load <4 x i32>, <4 x i32> addrspace(4)* %in 48 store <4 x i32> %ld, <4 x i32> addrspace(1)* %out 49 ret void 50} 51 52; FUNC-LABEL: {{^}}constant_load_v8i32: 53; GCN: s_load_dwordx8 54 55; EG: VTX_READ_128 56; EG: VTX_READ_128 57define amdgpu_kernel void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 { 58entry: 59 %ld = load <8 x i32>, <8 x i32> addrspace(4)* %in 60 store <8 x i32> %ld, <8 x i32> addrspace(1)* %out 61 ret void 62} 63 64; FUNC-LABEL: {{^}}constant_load_v16i32: 65; GCN: s_load_dwordx16 66 67; EG: VTX_READ_128 68; EG: VTX_READ_128 69; EG: VTX_READ_128 70; EG: VTX_READ_128 71define amdgpu_kernel void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 { 72entry: 73 %ld = load <16 x i32>, <16 x i32> addrspace(4)* %in 74 store <16 x i32> %ld, <16 x i32> addrspace(1)* %out 75 ret void 76} 77 78; FUNC-LABEL: {{^}}constant_zextload_i32_to_i64: 79; GCN-DAG: s_load_dword s[[SLO:[0-9]+]], 80; GCN-DAG: v_mov_b32_e32 v[[SHI:[0-9]+]], 0{{$}} 81; GCN: store_dwordx2 82 83; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY 84; EG: CF_END 85; EG: VTX_READ_32 86define amdgpu_kernel void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(4)* %in) #0 { 87 %ld = load i32, i32 addrspace(4)* %in 88 %ext = zext i32 %ld to i64 89 store i64 %ext, i64 addrspace(1)* %out 90 ret void 91} 92 93; FUNC-LABEL: {{^}}constant_sextload_i32_to_i64: 94; GCN: s_load_dword s[[SLO:[0-9]+]] 95; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[SLO]], 31 96; GCN: store_dwordx2 97 98; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY 99; EG: CF_END 100; EG: VTX_READ_32 101; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal. 102; EG: 31 103define amdgpu_kernel void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(4)* %in) #0 { 104 %ld = load i32, i32 addrspace(4)* %in 105 %ext = sext i32 %ld to i64 106 store i64 %ext, i64 addrspace(1)* %out 107 ret void 108} 109 110; FUNC-LABEL: {{^}}constant_zextload_v1i32_to_v1i64: 111; GCN: s_load_dword 112; GCN: store_dwordx2 113define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(4)* %in) #0 { 114 %ld = load <1 x i32>, <1 x i32> addrspace(4)* %in 115 %ext = zext <1 x i32> %ld to <1 x i64> 116 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 117 ret void 118} 119 120; FUNC-LABEL: {{^}}constant_sextload_v1i32_to_v1i64: 121; GCN: s_load_dword s[[LO:[0-9]+]] 122; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[LO]], 31 123; GCN: store_dwordx2 124define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(4)* %in) #0 { 125 %ld = load <1 x i32>, <1 x i32> addrspace(4)* %in 126 %ext = sext <1 x i32> %ld to <1 x i64> 127 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 128 ret void 129} 130 131; FUNC-LABEL: {{^}}constant_zextload_v2i32_to_v2i64: 132; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} 133; GCN: store_dwordx4 134define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 { 135 %ld = load <2 x i32>, <2 x i32> addrspace(4)* %in 136 %ext = zext <2 x i32> %ld to <2 x i64> 137 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 138 ret void 139} 140 141; FUNC-LABEL: {{^}}constant_sextload_v2i32_to_v2i64: 142; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} 143 144; GCN-DAG: s_ashr_i32 145; GCN-DAG: s_ashr_i32 146 147; GCN: store_dwordx4 148define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 { 149 %ld = load <2 x i32>, <2 x i32> addrspace(4)* %in 150 %ext = sext <2 x i32> %ld to <2 x i64> 151 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 152 ret void 153} 154 155; FUNC-LABEL: {{^}}constant_zextload_v4i32_to_v4i64: 156; GCN: s_load_dwordx4 157 158; GCN: store_dwordx4 159; GCN: store_dwordx4 160define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 { 161 %ld = load <4 x i32>, <4 x i32> addrspace(4)* %in 162 %ext = zext <4 x i32> %ld to <4 x i64> 163 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 164 ret void 165} 166 167; FUNC-LABEL: {{^}}constant_sextload_v4i32_to_v4i64: 168; GCN: s_load_dwordx4 169 170; GCN: s_ashr_i32 171; GCN: s_ashr_i32 172; GCN: s_ashr_i32 173; GCN: s_ashr_i32 174 175; GCN: store_dwordx4 176; GCN: store_dwordx4 177define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 { 178 %ld = load <4 x i32>, <4 x i32> addrspace(4)* %in 179 %ext = sext <4 x i32> %ld to <4 x i64> 180 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 181 ret void 182} 183 184; FUNC-LABEL: {{^}}constant_zextload_v8i32_to_v8i64: 185; GCN: s_load_dwordx8 186 187; GCN-NOHSA-DAG: buffer_store_dwordx4 188; GCN-NOHSA-DAG: buffer_store_dwordx4 189; GCN-NOHSA-DAG: buffer_store_dwordx4 190; GCN-NOHSA-DAG: buffer_store_dwordx4 191 192; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 193; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 194; GCN-SA-DAG: {{flat|global}}_store_dwordx4 195; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 196define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 { 197 %ld = load <8 x i32>, <8 x i32> addrspace(4)* %in 198 %ext = zext <8 x i32> %ld to <8 x i64> 199 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 200 ret void 201} 202 203; FUNC-LABEL: {{^}}constant_sextload_v8i32_to_v8i64: 204; GCN: s_load_dwordx8 205 206; GCN: s_ashr_i32 207; GCN: s_ashr_i32 208; GCN: s_ashr_i32 209; GCN: s_ashr_i32 210; GCN: s_ashr_i32 211; GCN: s_ashr_i32 212; GCN: s_ashr_i32 213; GCN: s_ashr_i32 214 215; GCN-NOHSA-DAG: buffer_store_dwordx4 216; GCN-NOHSA-DAG: buffer_store_dwordx4 217; GCN-NOHSA-DAG: buffer_store_dwordx4 218; GCN-NOHSA-DAG: buffer_store_dwordx4 219 220; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 221; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 222; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 223; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 224define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 { 225 %ld = load <8 x i32>, <8 x i32> addrspace(4)* %in 226 %ext = sext <8 x i32> %ld to <8 x i64> 227 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 228 ret void 229} 230 231; FUNC-LABEL: {{^}}constant_sextload_v16i32_to_v16i64: 232; GCN: s_load_dwordx16 233 234 235; GCN-DAG: s_ashr_i32 236 237; GCN: store_dwordx4 238; GCN: store_dwordx4 239; GCN: store_dwordx4 240; GCN: store_dwordx4 241; GCN: store_dwordx4 242; GCN: store_dwordx4 243; GCN: store_dwordx4 244; GCN: store_dwordx4 245define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 { 246 %ld = load <16 x i32>, <16 x i32> addrspace(4)* %in 247 %ext = sext <16 x i32> %ld to <16 x i64> 248 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 249 ret void 250} 251 252; FUNC-LABEL: {{^}}constant_zextload_v16i32_to_v16i64 253; GCN: s_load_dwordx16 254 255; GCN-NOHSA: buffer_store_dwordx4 256; GCN-NOHSA: buffer_store_dwordx4 257; GCN-NOHSA: buffer_store_dwordx4 258; GCN-NOHSA: buffer_store_dwordx4 259; GCN-NOHSA: buffer_store_dwordx4 260; GCN-NOHSA: buffer_store_dwordx4 261; GCN-NOHSA: buffer_store_dwordx4 262; GCN-NOHSA: buffer_store_dwordx4 263 264; GCN-HSA: {{flat|global}}_store_dwordx4 265; GCN-HSA: {{flat|global}}_store_dwordx4 266; GCN-HSA: {{flat|global}}_store_dwordx4 267; GCN-HSA: {{flat|global}}_store_dwordx4 268; GCN-HSA: {{flat|global}}_store_dwordx4 269; GCN-HSA: {{flat|global}}_store_dwordx4 270; GCN-HSA: {{flat|global}}_store_dwordx4 271; GCN-HSA: {{flat|global}}_store_dwordx4 272define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 { 273 %ld = load <16 x i32>, <16 x i32> addrspace(4)* %in 274 %ext = zext <16 x i32> %ld to <16 x i64> 275 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 276 ret void 277} 278 279; FUNC-LABEL: {{^}}constant_sextload_v32i32_to_v32i64: 280 281; GCN: s_load_dwordx16 282; GCN-DAG: s_load_dwordx16 283 284; GCN-NOHSA-DAG: buffer_store_dwordx4 285; GCN-NOHSA-DAG: buffer_store_dwordx4 286; GCN-NOHSA-DAG: buffer_store_dwordx4 287; GCN-NOHSA-DAG: buffer_store_dwordx4 288 289; GCN-NOHSA-DAG: buffer_store_dwordx4 290; GCN-NOHSA-DAG: buffer_store_dwordx4 291; GCN-NOHSA-DAG: buffer_store_dwordx4 292; GCN-NOHSA-DAG: buffer_store_dwordx4 293 294; GCN-NOHSA-DAG: buffer_store_dwordx4 295; GCN-NOHSA-DAG: buffer_store_dwordx4 296; GCN-NOHSA-DAG: buffer_store_dwordx4 297; GCN-NOHSA-DAG: buffer_store_dwordx4 298 299; GCN-NOHSA-DAG: buffer_store_dwordx4 300; GCN-NOHSA-DAG: buffer_store_dwordx4 301; GCN-NOHSA-DAG: buffer_store_dwordx4 302; GCN-NOHSA-DAG: buffer_store_dwordx4 303 304; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 305; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 306; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 307; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 308 309; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 310; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 311; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 312; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 313 314; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 315; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 316; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 317; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 318 319; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 320; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 321; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 322; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 323 324define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 { 325 %ld = load <32 x i32>, <32 x i32> addrspace(4)* %in 326 %ext = sext <32 x i32> %ld to <32 x i64> 327 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 328 ret void 329} 330 331; FUNC-LABEL: {{^}}constant_zextload_v32i32_to_v32i64: 332; GCN: s_load_dwordx16 333; GCN: s_load_dwordx16 334 335; GCN-NOHSA-DAG: buffer_store_dwordx4 336; GCN-NOHSA-DAG: buffer_store_dwordx4 337; GCN-NOHSA-DAG: buffer_store_dwordx4 338; GCN-NOHSA-DAG: buffer_store_dwordx4 339 340; GCN-NOHSA-DAG: buffer_store_dwordx4 341; GCN-NOHSA-DAG: buffer_store_dwordx4 342; GCN-NOHSA-DAG: buffer_store_dwordx4 343; GCN-NOHSA-DAG: buffer_store_dwordx4 344 345; GCN-NOHSA-DAG: buffer_store_dwordx4 346; GCN-NOHSA-DAG: buffer_store_dwordx4 347; GCN-NOHSA-DAG: buffer_store_dwordx4 348; GCN-NOHSA-DAG: buffer_store_dwordx4 349 350; GCN-NOHSA-DAG: buffer_store_dwordx4 351; GCN-NOHSA-DAG: buffer_store_dwordx4 352; GCN-NOHSA-DAG: buffer_store_dwordx4 353; GCN-NOHSA-DAG: buffer_store_dwordx4 354 355 356; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 357; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 358; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 359; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 360 361; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 362; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 363; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 364; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 365 366; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 367; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 368; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 369; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 370 371; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 372; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 373; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 374; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 375define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 { 376 %ld = load <32 x i32>, <32 x i32> addrspace(4)* %in 377 %ext = zext <32 x i32> %ld to <32 x i64> 378 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 379 ret void 380} 381 382; FUNC-LABEL: {{^}}constant_load_v32i32: 383; GCN: s_load_dwordx16 384; GCN: s_load_dwordx16 385 386; GCN-NOHSA-DAG: buffer_store_dwordx4 387; GCN-NOHSA-DAG: buffer_store_dwordx4 388; GCN-NOHSA-DAG: buffer_store_dwordx4 389; GCN-NOHSA-DAG: buffer_store_dwordx4 390 391; GCN-NOHSA-DAG: buffer_store_dwordx4 392; GCN-NOHSA-DAG: buffer_store_dwordx4 393; GCN-NOHSA-DAG: buffer_store_dwordx4 394; GCN-NOHSA-DAG: buffer_store_dwordx4 395 396; GCN-NOHSA-DAG: buffer_store_dwordx4 397; GCN-NOHSA-DAG: buffer_store_dwordx4 398; GCN-NOHSA-DAG: buffer_store_dwordx4 399; GCN-NOHSA-DAG: buffer_store_dwordx4 400 401; GCN-NOHSA-DAG: buffer_store_dwordx4 402; GCN-NOHSA-DAG: buffer_store_dwordx4 403; GCN-NOHSA-DAG: buffer_store_dwordx4 404; GCN-NOHSA-DAG: buffer_store_dwordx4 405 406; GCN-NOT: accvgpr 407 408; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 409; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 410; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 411; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 412 413; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 414; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 415; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 416; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 417 418; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 419; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 420; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 421; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 422 423; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 424; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 425; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 426; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 427define amdgpu_kernel void @constant_load_v32i32(<32 x i32> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 { 428 %ld = load <32 x i32>, <32 x i32> addrspace(4)* %in 429 store <32 x i32> %ld, <32 x i32> addrspace(1)* %out 430 ret void 431} 432 433attributes #0 = { nounwind } 434