1; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=SI-NOHSA -check-prefix=FUNC %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=GCNX3-HSA -check-prefix=FUNC %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=GCNX3-NOHSA -check-prefix=FUNC %s 4; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s 5; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s 6; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s 7 8; FUNC-LABEL: {{^}}global_load_i32: 9; GCN-NOHSA: buffer_load_dword v{{[0-9]+}} 10; GCN-HSA: {{flat|global}}_load_dword 11 12; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 13define amdgpu_kernel void @global_load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 14entry: 15 %ld = load i32, i32 addrspace(1)* %in 16 store i32 %ld, i32 addrspace(1)* %out 17 ret void 18} 19 20; FUNC-LABEL: {{^}}global_load_v2i32: 21; GCN-NOHSA: buffer_load_dwordx2 22; GCN-HSA: {{flat|global}}_load_dwordx2 23 24; EG: VTX_READ_64 25define amdgpu_kernel void @global_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 { 26entry: 27 %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in 28 store <2 x i32> %ld, <2 x i32> addrspace(1)* %out 29 ret void 30} 31 32; FUNC-LABEL: {{^}}global_load_v3i32: 33; SI-NOHSA: buffer_load_dwordx4 34; GCNX3-NOHSA: buffer_load_dwordx3 35; GCNX3-HSA: {{flat|global}}_load_dwordx3 36 37; EG: VTX_READ_128 38define amdgpu_kernel void @global_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %in) #0 { 39entry: 40 %ld = load <3 x i32>, <3 x i32> addrspace(1)* %in 41 store <3 x i32> %ld, <3 x i32> addrspace(1)* %out 42 ret void 43} 44 45; FUNC-LABEL: {{^}}global_load_v4i32: 46; GCN-NOHSA: buffer_load_dwordx4 47; GCN-HSA: {{flat|global}}_load_dwordx4 48 49; EG: VTX_READ_128 50define amdgpu_kernel void @global_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { 51entry: 52 %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in 53 store <4 x i32> %ld, <4 x i32> addrspace(1)* %out 54 ret void 55} 56 57; FUNC-LABEL: {{^}}global_load_v8i32: 58; GCN-NOHSA: buffer_load_dwordx4 59; GCN-NOHSA: buffer_load_dwordx4 60; GCN-HSA: {{flat|global}}_load_dwordx4 61; GCN-HSA: {{flat|global}}_load_dwordx4 62 63; EG: VTX_READ_128 64; EG: VTX_READ_128 65define amdgpu_kernel void @global_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 { 66entry: 67 %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in 68 store <8 x i32> %ld, <8 x i32> addrspace(1)* %out 69 ret void 70} 71 72; FUNC-LABEL: {{^}}global_load_v16i32: 73; GCN-NOHSA: buffer_load_dwordx4 74; GCN-NOHSA: buffer_load_dwordx4 75; GCN-NOHSA: buffer_load_dwordx4 76; GCN-NOHSA: buffer_load_dwordx4 77 78; GCN-HSA: {{flat|global}}_load_dwordx4 79; GCN-HSA: {{flat|global}}_load_dwordx4 80; GCN-HSA: {{flat|global}}_load_dwordx4 81; GCN-HSA: {{flat|global}}_load_dwordx4 82 83; EG: VTX_READ_128 84; EG: VTX_READ_128 85; EG: VTX_READ_128 86; EG: VTX_READ_128 87define amdgpu_kernel void @global_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 { 88entry: 89 %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in 90 store <16 x i32> %ld, <16 x i32> addrspace(1)* %out 91 ret void 92} 93 94; FUNC-LABEL: {{^}}global_zextload_i32_to_i64: 95; GCN-NOHSA-DAG: buffer_load_dword v[[LO:[0-9]+]], 96; GCN-HSA-DAG: {{flat|global}}_load_dword v[[LO:[0-9]+]], 97; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 98 99; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] 100; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]] 101 102; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY 103define amdgpu_kernel void @global_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 104 %ld = load i32, i32 addrspace(1)* %in 105 %ext = zext i32 %ld to i64 106 store i64 %ext, i64 addrspace(1)* %out 107 ret void 108} 109 110; FUNC-LABEL: {{^}}global_sextload_i32_to_i64: 111; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]] 112; GCN-HSA: {{flat|global}}_load_dword v[[LO:[0-9]+]] 113; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 114; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} 115; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 116 117 118; EG: MEM_RAT 119; EG: VTX_READ_32 120; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal. 121; EG: 31 122define amdgpu_kernel void @global_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 123 %ld = load i32, i32 addrspace(1)* %in 124 %ext = sext i32 %ld to i64 125 store i64 %ext, i64 addrspace(1)* %out 126 ret void 127} 128 129; FUNC-LABEL: {{^}}global_zextload_v1i32_to_v1i64: 130; GCN-NOHSA: buffer_load_dword 131; GCN-NOHSA: buffer_store_dwordx2 132 133; GCN-HSA: {{flat|global}}_load_dword 134; GCN-HSA: {{flat|global}}_store_dwordx2 135define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 { 136 %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in 137 %ext = zext <1 x i32> %ld to <1 x i64> 138 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 139 ret void 140} 141 142; FUNC-LABEL: {{^}}global_sextload_v1i32_to_v1i64: 143; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]] 144; GCN-HSA: {{flat|global}}_load_dword v[[LO:[0-9]+]] 145; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 146; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} 147; GCN-HSA: {{flat|global}}_store_dwordx2 v{{.+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 148define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 { 149 %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in 150 %ext = sext <1 x i32> %ld to <1 x i64> 151 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 152 ret void 153} 154 155; FUNC-LABEL: {{^}}global_zextload_v2i32_to_v2i64: 156; GCN-NOHSA: buffer_load_dwordx2 157; GCN-NOHSA: buffer_store_dwordx4 158 159; GCN-HSA: {{flat|global}}_load_dwordx2 160; GCN-HSA: {{flat|global}}_store_dwordx4 161define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 { 162 %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in 163 %ext = zext <2 x i32> %ld to <2 x i64> 164 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 165 ret void 166} 167 168; FUNC-LABEL: {{^}}global_sextload_v2i32_to_v2i64: 169; GCN-NOHSA: buffer_load_dwordx2 170; GCN-HSA: {{flat|global}}_load_dwordx2 171 172; GCN-DAG: v_ashrrev_i32 173; GCN-DAG: v_ashrrev_i32 174 175; GCN-NOHSA-DAG: buffer_store_dwordx4 176; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 177define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 { 178 %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in 179 %ext = sext <2 x i32> %ld to <2 x i64> 180 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 181 ret void 182} 183 184; FUNC-LABEL: {{^}}global_zextload_v4i32_to_v4i64: 185; GCN-NOHSA: buffer_load_dwordx4 186; GCN-NOHSA: buffer_store_dwordx4 187; GCN-NOHSA: buffer_store_dwordx4 188 189; GCN-HSA: {{flat|global}}_load_dwordx4 190; GCN-HSA: {{flat|global}}_store_dwordx4 191; GCN-HSA: {{flat|global}}_store_dwordx4 192define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { 193 %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in 194 %ext = zext <4 x i32> %ld to <4 x i64> 195 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 196 ret void 197} 198 199; FUNC-LABEL: {{^}}global_sextload_v4i32_to_v4i64: 200; GCN-NOHSA: buffer_load_dwordx4 201; GCN-HSA: {{flat|global}}_load_dwordx4 202 203; GCN-DAG: v_ashrrev_i32 204; GCN-DAG: v_ashrrev_i32 205; GCN-DAG: v_ashrrev_i32 206; GCN-DAG: v_ashrrev_i32 207 208; GCN-NOHSA-DAG: buffer_store_dwordx4 209; GCN-NOHSA-DAG: buffer_store_dwordx4 210 211; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 212; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 213define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { 214 %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in 215 %ext = sext <4 x i32> %ld to <4 x i64> 216 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 217 ret void 218} 219 220; FUNC-LABEL: {{^}}global_zextload_v8i32_to_v8i64: 221; GCN-NOHSA: buffer_load_dwordx4 222; GCN-NOHSA: buffer_load_dwordx4 223 224; GCN-HSA: {{flat|global}}_load_dwordx4 225; GCN-HSA: {{flat|global}}_load_dwordx4 226 227; GCN-NOHSA-DAG: buffer_store_dwordx4 228; GCN-NOHSA-DAG: buffer_store_dwordx4 229; GCN-NOHSA-DAG: buffer_store_dwordx4 230; GCN-NOHSA-DAG: buffer_store_dwordx4 231 232; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 233; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 234; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 235; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 236define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 { 237 %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in 238 %ext = zext <8 x i32> %ld to <8 x i64> 239 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 240 ret void 241} 242 243; FUNC-LABEL: {{^}}global_sextload_v8i32_to_v8i64: 244; GCN-NOHSA: buffer_load_dwordx4 245; GCN-NOHSA: buffer_load_dwordx4 246 247; GCN-HSA: {{flat|global}}_load_dwordx4 248; GCN-HSA: {{flat|global}}_load_dwordx4 249 250; GCN-DAG: v_ashrrev_i32 251; GCN-DAG: v_ashrrev_i32 252; GCN-DAG: v_ashrrev_i32 253; GCN-DAG: v_ashrrev_i32 254; GCN-DAG: v_ashrrev_i32 255; GCN-DAG: v_ashrrev_i32 256; GCN-DAG: v_ashrrev_i32 257; GCN-DAG: v_ashrrev_i32 258 259; GCN-NOHSA-DAG: buffer_store_dwordx4 260; GCN-NOHSA-DAG: buffer_store_dwordx4 261; GCN-NOHSA-DAG: buffer_store_dwordx4 262; GCN-NOHSA-DAG: buffer_store_dwordx4 263 264; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 265; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 266; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 267; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 268define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 { 269 %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in 270 %ext = sext <8 x i32> %ld to <8 x i64> 271 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 272 ret void 273} 274 275; FUNC-LABEL: {{^}}global_sextload_v16i32_to_v16i64: 276; GCN-NOHSA: buffer_load_dwordx4 277; GCN-NOHSA: buffer_load_dwordx4 278; GCN-NOHSA: buffer_load_dwordx4 279; GCN-NOHSA: buffer_load_dwordx4 280 281; GCN-HSA: {{flat|global}}_load_dwordx4 282; GCN-HSA: {{flat|global}}_load_dwordx4 283; GCN-HSA: {{flat|global}}_load_dwordx4 284; GCN-HSA: {{flat|global}}_load_dwordx4 285 286 287; GCN-DAG: v_ashrrev_i32 288; GCN-DAG: v_ashrrev_i32 289; GCN-DAG: v_ashrrev_i32 290; GCN-DAG: v_ashrrev_i32 291; GCN-NOHSA-DAG: buffer_store_dwordx4 292; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 293 294; GCN-DAG: v_ashrrev_i32 295; GCN-DAG: v_ashrrev_i32 296; GCN-DAG: v_ashrrev_i32 297; GCN-DAG: v_ashrrev_i32 298; GCN-NOHSA-DAG: buffer_store_dwordx4 299; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 300 301; GCN-DAG: v_ashrrev_i32 302; GCN-DAG: v_ashrrev_i32 303; GCN-DAG: v_ashrrev_i32 304; GCN-DAG: v_ashrrev_i32 305; GCN-NOHSA-DAG: buffer_store_dwordx4 306; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 307 308; GCN-DAG: v_ashrrev_i32 309; GCN-DAG: v_ashrrev_i32 310; GCN-DAG: v_ashrrev_i32 311; GCN-DAG: v_ashrrev_i32 312; GCN-NOHSA-DAG: buffer_store_dwordx4 313; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 314define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 { 315 %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in 316 %ext = sext <16 x i32> %ld to <16 x i64> 317 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 318 ret void 319} 320 321; FUNC-LABEL: {{^}}global_zextload_v16i32_to_v16i64 322; GCN-NOHSA: buffer_load_dwordx4 323; GCN-NOHSA: buffer_load_dwordx4 324; GCN-NOHSA: buffer_load_dwordx4 325; GCN-NOHSA: buffer_load_dwordx4 326 327; GCN-HSA: {{flat|global}}_load_dwordx4 328; GCN-HSA: {{flat|global}}_load_dwordx4 329; GCN-HSA: {{flat|global}}_load_dwordx4 330; GCN-HSA: {{flat|global}}_load_dwordx4 331 332; GCN-NOHSA: buffer_store_dwordx4 333; GCN-NOHSA: buffer_store_dwordx4 334; GCN-NOHSA: buffer_store_dwordx4 335; GCN-NOHSA: buffer_store_dwordx4 336; GCN-NOHSA: buffer_store_dwordx4 337; GCN-NOHSA: buffer_store_dwordx4 338; GCN-NOHSA: buffer_store_dwordx4 339; GCN-NOHSA: buffer_store_dwordx4 340 341; GCN-HSA: {{flat|global}}_store_dwordx4 342; GCN-HSA: {{flat|global}}_store_dwordx4 343; GCN-HSA: {{flat|global}}_store_dwordx4 344; GCN-HSA: {{flat|global}}_store_dwordx4 345; GCN-HSA: {{flat|global}}_store_dwordx4 346; GCN-HSA: {{flat|global}}_store_dwordx4 347; GCN-HSA: {{flat|global}}_store_dwordx4 348; GCN-HSA: {{flat|global}}_store_dwordx4 349define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 { 350 %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in 351 %ext = zext <16 x i32> %ld to <16 x i64> 352 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 353 ret void 354} 355 356; FUNC-LABEL: {{^}}global_sextload_v32i32_to_v32i64: 357 358; GCN-NOHSA: buffer_load_dwordx4 359; GCN-NOHSA: buffer_load_dwordx4 360; GCN-NOHSA: buffer_load_dwordx4 361; GCN-NOHSA: buffer_load_dwordx4 362; GCN-NOHSA: buffer_load_dwordx4 363; GCN-NOHSA: buffer_load_dwordx4 364; GCN-NOHSA: buffer_load_dwordx4 365; GCN-NOHSA-DAG: buffer_load_dwordx4 366 367; GCN-HSA: {{flat|global}}_load_dwordx4 368; GCN-HSA: {{flat|global}}_load_dwordx4 369; GCN-HSA: {{flat|global}}_load_dwordx4 370; GCN-HSA: {{flat|global}}_load_dwordx4 371; GCN-HSA: {{flat|global}}_load_dwordx4 372; GCN-HSA: {{flat|global}}_load_dwordx4 373; GCN-HSA: {{flat|global}}_load_dwordx4 374; GCN-HSA: {{flat|global}}_load_dwordx4 375 376; GCN-DAG: v_ashrrev_i32 377; GCN-DAG: v_ashrrev_i32 378; GCN-DAG: v_ashrrev_i32 379; GCN-DAG: v_ashrrev_i32 380; GCN-DAG: v_ashrrev_i32 381; GCN-DAG: v_ashrrev_i32 382; GCN-DAG: v_ashrrev_i32 383; GCN-DAG: v_ashrrev_i32 384; GCN-DAG: v_ashrrev_i32 385; GCN-DAG: v_ashrrev_i32 386; GCN-DAG: v_ashrrev_i32 387; GCN-DAG: v_ashrrev_i32 388; GCN-DAG: v_ashrrev_i32 389; GCN-DAG: v_ashrrev_i32 390; GCN-DAG: v_ashrrev_i32 391; GCN-DAG: v_ashrrev_i32 392; GCN-DAG: v_ashrrev_i32 393; GCN-DAG: v_ashrrev_i32 394; GCN-DAG: v_ashrrev_i32 395; GCN-DAG: v_ashrrev_i32 396; GCN-DAG: v_ashrrev_i32 397; GCN-DAG: v_ashrrev_i32 398; GCN-DAG: v_ashrrev_i32 399; GCN-DAG: v_ashrrev_i32 400; GCN-DAG: v_ashrrev_i32 401; GCN-DAG: v_ashrrev_i32 402; GCN-DAG: v_ashrrev_i32 403; GCN-DAG: v_ashrrev_i32 404; GCN-DAG: v_ashrrev_i32 405; GCN-DAG: v_ashrrev_i32 406; GCN-DAG: v_ashrrev_i32 407; GCN-DAG: v_ashrrev_i32 408 409; GCN-NOHSA: buffer_store_dwordx4 410; GCN-NOHSA: buffer_store_dwordx4 411; GCN-NOHSA: buffer_store_dwordx4 412; GCN-NOHSA: buffer_store_dwordx4 413 414; GCN-NOHSA: buffer_store_dwordx4 415; GCN-NOHSA: buffer_store_dwordx4 416; GCN-NOHSA: buffer_store_dwordx4 417; GCN-NOHSA: buffer_store_dwordx4 418 419; GCN-NOHSA: buffer_store_dwordx4 420; GCN-NOHSA: buffer_store_dwordx4 421; GCN-NOHSA: buffer_store_dwordx4 422; GCN-NOHSA: buffer_store_dwordx4 423 424; GCN-NOHSA: buffer_store_dwordx4 425; GCN-NOHSA: buffer_store_dwordx4 426; GCN-NOHSA: buffer_store_dwordx4 427; GCN-NOHSA: buffer_store_dwordx4 428 429; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 430; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 431; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 432; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 433 434; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 435; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 436; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 437; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 438 439; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 440; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 441; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 442; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 443 444; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 445; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 446; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 447; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 448 449define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 { 450 %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in 451 %ext = sext <32 x i32> %ld to <32 x i64> 452 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 453 ret void 454} 455 456; FUNC-LABEL: {{^}}global_zextload_v32i32_to_v32i64: 457; GCN-NOHSA: buffer_load_dwordx4 458; GCN-NOHSA: buffer_load_dwordx4 459; GCN-NOHSA: buffer_load_dwordx4 460; GCN-NOHSA: buffer_load_dwordx4 461; GCN-NOHSA: buffer_load_dwordx4 462; GCN-NOHSA: buffer_load_dwordx4 463; GCN-NOHSA: buffer_load_dwordx4 464; GCN-NOHSA: buffer_load_dwordx4 465 466; GCN-HSA: {{flat|global}}_load_dwordx4 467; GCN-HSA: {{flat|global}}_load_dwordx4 468; GCN-HSA: {{flat|global}}_load_dwordx4 469; GCN-HSA: {{flat|global}}_load_dwordx4 470; GCN-HSA: {{flat|global}}_load_dwordx4 471; GCN-HSA: {{flat|global}}_load_dwordx4 472; GCN-HSA: {{flat|global}}_load_dwordx4 473; GCN-HSA: {{flat|global}}_load_dwordx4 474 475 476; GCN-NOHSA-DAG: buffer_store_dwordx4 477; GCN-NOHSA-DAG: buffer_store_dwordx4 478; GCN-NOHSA-DAG: buffer_store_dwordx4 479; GCN-NOHSA-DAG: buffer_store_dwordx4 480 481; GCN-NOHSA-DAG: buffer_store_dwordx4 482; GCN-NOHSA-DAG: buffer_store_dwordx4 483; GCN-NOHSA-DAG: buffer_store_dwordx4 484; GCN-NOHSA-DAG: buffer_store_dwordx4 485 486; GCN-NOHSA-DAG: buffer_store_dwordx4 487; GCN-NOHSA-DAG: buffer_store_dwordx4 488; GCN-NOHSA-DAG: buffer_store_dwordx4 489; GCN-NOHSA-DAG: buffer_store_dwordx4 490 491; GCN-NOHSA-DAG: buffer_store_dwordx4 492; GCN-NOHSA-DAG: buffer_store_dwordx4 493; GCN-NOHSA-DAG: buffer_store_dwordx4 494; GCN-NOHSA-DAG: buffer_store_dwordx4 495 496 497; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 498; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 499; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 500; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 501 502; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 503; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 504; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 505; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 506 507; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 508; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 509; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 510; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 511 512; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 513; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 514; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 515; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 516define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 { 517 %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in 518 %ext = zext <32 x i32> %ld to <32 x i64> 519 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 520 ret void 521} 522 523; FUNC-LABEL: {{^}}global_load_v32i32: 524; GCN-NOHSA: buffer_load_dwordx4 525; GCN-NOHSA: buffer_load_dwordx4 526; GCN-NOHSA: buffer_load_dwordx4 527; GCN-NOHSA: buffer_load_dwordx4 528; GCN-NOHSA: buffer_load_dwordx4 529; GCN-NOHSA: buffer_load_dwordx4 530; GCN-NOHSA: buffer_load_dwordx4 531; GCN-NOHSA: buffer_load_dwordx4 532 533; GCN-HSA: {{flat|global}}_load_dwordx4 534; GCN-HSA: {{flat|global}}_load_dwordx4 535; GCN-HSA: {{flat|global}}_load_dwordx4 536; GCN-HSA: {{flat|global}}_load_dwordx4 537; GCN-HSA: {{flat|global}}_load_dwordx4 538; GCN-HSA: {{flat|global}}_load_dwordx4 539; GCN-HSA: {{flat|global}}_load_dwordx4 540; GCN-HSA: {{flat|global}}_load_dwordx4 541 542 543; GCN-NOHSA-DAG: buffer_store_dwordx4 544; GCN-NOHSA-DAG: buffer_store_dwordx4 545; GCN-NOHSA-DAG: buffer_store_dwordx4 546; GCN-NOHSA-DAG: buffer_store_dwordx4 547 548; GCN-NOHSA-DAG: buffer_store_dwordx4 549; GCN-NOHSA-DAG: buffer_store_dwordx4 550; GCN-NOHSA-DAG: buffer_store_dwordx4 551; GCN-NOHSA-DAG: buffer_store_dwordx4 552 553; GCN-NOHSA-DAG: buffer_store_dwordx4 554; GCN-NOHSA-DAG: buffer_store_dwordx4 555; GCN-NOHSA-DAG: buffer_store_dwordx4 556; GCN-NOHSA-DAG: buffer_store_dwordx4 557 558; GCN-NOHSA-DAG: buffer_store_dwordx4 559; GCN-NOHSA-DAG: buffer_store_dwordx4 560; GCN-NOHSA-DAG: buffer_store_dwordx4 561; GCN-NOHSA-DAG: buffer_store_dwordx4 562 563; GCN-NOT: accvgpr 564 565; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 566; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 567; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 568; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 569 570; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 571; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 572; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 573; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 574 575; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 576; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 577; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 578; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 579 580; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 581; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 582; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 583; GCN-HSA-DAG: {{flat|global}}_store_dwordx4 584define amdgpu_kernel void @global_load_v32i32(<32 x i32> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 { 585 %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in 586 store <32 x i32> %ld, <32 x i32> addrspace(1)* %out 587 ret void 588} 589 590attributes #0 = { nounwind } 591