1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5; FUNC-LABEL: {{^}}zextload_global_i32_to_i64: 6; SI: buffer_load_dword v[[LO:[0-9]+]], 7; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 8; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] 9define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 10 %a = load i32, i32 addrspace(1)* %in 11 %ext = zext i32 %a to i64 12 store i64 %ext, i64 addrspace(1)* %out 13 ret void 14} 15 16; FUNC-LABEL: {{^}}sextload_global_i32_to_i64: 17; SI: buffer_load_dword [[LOAD:v[0-9]+]], 18; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] 19; SI: buffer_store_dwordx2 20define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 21 %a = load i32, i32 addrspace(1)* %in 22 %ext = sext i32 %a to i64 23 store i64 %ext, i64 addrspace(1)* %out 24 ret void 25} 26 27; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64: 28; SI: buffer_load_dword 29; SI: buffer_store_dwordx2 30; SI: s_endpgm 31define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind { 32 %load = load <1 x i32>, <1 x i32> addrspace(1)* %in 33 %ext = zext <1 x i32> %load to <1 x i64> 34 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 35 ret void 36} 37 38; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64: 39; SI: buffer_load_dword 40; SI: v_ashrrev_i32 41; SI: buffer_store_dwordx2 42; SI: s_endpgm 43define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind { 44 %load = load <1 x i32>, <1 x i32> addrspace(1)* %in 45 %ext = sext <1 x i32> %load to <1 x i64> 46 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 47 ret void 48} 49 50; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64: 51; SI: buffer_load_dwordx2 52; SI: buffer_store_dwordx4 53; SI: s_endpgm 54define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind { 55 %load = load <2 x i32>, <2 x i32> addrspace(1)* %in 56 %ext = zext <2 x i32> %load to <2 x i64> 57 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 58 ret void 59} 60 61; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64: 62; SI: buffer_load_dwordx2 63; SI-DAG: v_ashrrev_i32 64; SI-DAG: v_ashrrev_i32 65; SI-DAG: buffer_store_dwordx4 66; SI: s_endpgm 67define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind { 68 %load = load <2 x i32>, <2 x i32> addrspace(1)* %in 69 %ext = sext <2 x i32> %load to <2 x i64> 70 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 71 ret void 72} 73 74; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64: 75; SI: buffer_load_dwordx4 76; SI: buffer_store_dwordx4 77; SI: buffer_store_dwordx4 78; SI: s_endpgm 79define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind { 80 %load = load <4 x i32>, <4 x i32> addrspace(1)* %in 81 %ext = zext <4 x i32> %load to <4 x i64> 82 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 83 ret void 84} 85 86; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64: 87; SI: buffer_load_dwordx4 88; SI-DAG: v_ashrrev_i32 89; SI-DAG: v_ashrrev_i32 90; SI-DAG: v_ashrrev_i32 91; SI-DAG: v_ashrrev_i32 92; SI-DAG: buffer_store_dwordx4 93; SI-DAG: buffer_store_dwordx4 94; SI: s_endpgm 95define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind { 96 %load = load <4 x i32>, <4 x i32> addrspace(1)* %in 97 %ext = sext <4 x i32> %load to <4 x i64> 98 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 99 ret void 100} 101 102; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64: 103; SI: buffer_load_dwordx4 104; SI: buffer_load_dwordx4 105; SI-DAG: buffer_store_dwordx4 106; SI-DAG: buffer_store_dwordx4 107; SI-DAG: buffer_store_dwordx4 108; SI-DAG: buffer_store_dwordx4 109; SI: s_endpgm 110define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind { 111 %load = load <8 x i32>, <8 x i32> addrspace(1)* %in 112 %ext = zext <8 x i32> %load to <8 x i64> 113 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 114 ret void 115} 116 117; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64: 118; SI: buffer_load_dwordx4 119; SI: buffer_load_dwordx4 120 121; SI-DAG: v_ashrrev_i32 122; SI-DAG: v_ashrrev_i32 123; SI-DAG: v_ashrrev_i32 124; SI-DAG: v_ashrrev_i32 125; SI-DAG: v_ashrrev_i32 126; SI-DAG: v_ashrrev_i32 127; SI-DAG: v_ashrrev_i32 128; SI-DAG: v_ashrrev_i32 129; SI-DAG: buffer_store_dwordx4 130; SI-DAG: buffer_store_dwordx4 131; SI-DAG: buffer_store_dwordx4 132; SI-DAG: buffer_store_dwordx4 133; SI: s_endpgm 134define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind { 135 %load = load <8 x i32>, <8 x i32> addrspace(1)* %in 136 %ext = sext <8 x i32> %load to <8 x i64> 137 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 138 ret void 139} 140 141; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64: 142; SI: buffer_load_dwordx4 143; SI: buffer_load_dwordx4 144; SI: buffer_load_dwordx4 145; SI: buffer_load_dwordx4 146 147; SI-DAG: v_ashrrev_i32 148; SI-DAG: v_ashrrev_i32 149; SI-DAG: v_ashrrev_i32 150; SI-DAG: v_ashrrev_i32 151; SI-DAG: buffer_store_dwordx4 152 153; SI-DAG: v_ashrrev_i32 154; SI-DAG: v_ashrrev_i32 155; SI-DAG: v_ashrrev_i32 156; SI-DAG: v_ashrrev_i32 157; SI-DAG: buffer_store_dwordx4 158 159; SI-DAG: v_ashrrev_i32 160; SI-DAG: v_ashrrev_i32 161; SI-DAG: v_ashrrev_i32 162; SI-DAG: v_ashrrev_i32 163; SI-DAG: buffer_store_dwordx4 164 165; SI-DAG: v_ashrrev_i32 166; SI-DAG: v_ashrrev_i32 167; SI-DAG: v_ashrrev_i32 168; SI-DAG: v_ashrrev_i32 169; SI-DAG: buffer_store_dwordx4 170; SI: s_endpgm 171define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind { 172 %load = load <16 x i32>, <16 x i32> addrspace(1)* %in 173 %ext = sext <16 x i32> %load to <16 x i64> 174 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 175 ret void 176} 177 178; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64 179; SI: buffer_load_dwordx4 180; SI: buffer_load_dwordx4 181; SI: buffer_load_dwordx4 182; SI: buffer_load_dwordx4 183 184; SI: buffer_store_dwordx4 185; SI: buffer_store_dwordx4 186; SI: buffer_store_dwordx4 187; SI: buffer_store_dwordx4 188; SI: buffer_store_dwordx4 189; SI: buffer_store_dwordx4 190; SI: buffer_store_dwordx4 191; SI: buffer_store_dwordx4 192; SI: s_endpgm 193define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind { 194 %load = load <16 x i32>, <16 x i32> addrspace(1)* %in 195 %ext = zext <16 x i32> %load to <16 x i64> 196 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 197 ret void 198} 199 200; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64: 201; SI: buffer_load_dwordx4 202; SI: buffer_load_dwordx4 203; SI: buffer_load_dwordx4 204; SI: buffer_load_dwordx4 205; SI: buffer_load_dwordx4 206; SI: buffer_load_dwordx4 207; SI: buffer_load_dwordx4 208; SI: buffer_load_dwordx4 209 210 211; SI-DAG: v_ashrrev_i32 212; SI-DAG: v_ashrrev_i32 213; SI-DAG: v_ashrrev_i32 214; SI-DAG: v_ashrrev_i32 215; SI-DAG: v_ashrrev_i32 216; SI-DAG: v_ashrrev_i32 217; SI-DAG: v_ashrrev_i32 218; SI-DAG: v_ashrrev_i32 219; SI-DAG: v_ashrrev_i32 220; SI-DAG: v_ashrrev_i32 221; SI-DAG: v_ashrrev_i32 222; SI-DAG: v_ashrrev_i32 223; SI-DAG: v_ashrrev_i32 224; SI-DAG: v_ashrrev_i32 225; SI-DAG: v_ashrrev_i32 226; SI-DAG: v_ashrrev_i32 227; SI-DAG: v_ashrrev_i32 228; SI-DAG: v_ashrrev_i32 229; SI-DAG: v_ashrrev_i32 230; SI-DAG: v_ashrrev_i32 231; SI-DAG: v_ashrrev_i32 232; SI-DAG: v_ashrrev_i32 233; SI-DAG: v_ashrrev_i32 234; SI-DAG: v_ashrrev_i32 235; SI-DAG: v_ashrrev_i32 236; SI-DAG: v_ashrrev_i32 237; SI-DAG: v_ashrrev_i32 238; SI-DAG: v_ashrrev_i32 239; SI-DAG: v_ashrrev_i32 240; SI-DAG: v_ashrrev_i32 241; SI-DAG: v_ashrrev_i32 242; SI-DAG: v_ashrrev_i32 243 244; SI-DAG: buffer_store_dwordx4 245; SI-DAG: buffer_store_dwordx4 246; SI-DAG: buffer_store_dwordx4 247; SI-DAG: buffer_store_dwordx4 248 249; SI-DAG: buffer_store_dwordx4 250; SI-DAG: buffer_store_dwordx4 251; SI-DAG: buffer_store_dwordx4 252; SI-DAG: buffer_store_dwordx4 253 254; SI-DAG: buffer_store_dwordx4 255; SI-DAG: buffer_store_dwordx4 256; SI-DAG: buffer_store_dwordx4 257; SI-DAG: buffer_store_dwordx4 258 259; SI-DAG: buffer_store_dwordx4 260; SI-DAG: buffer_store_dwordx4 261; SI-DAG: buffer_store_dwordx4 262; SI-DAG: buffer_store_dwordx4 263 264; SI: s_endpgm 265define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind { 266 %load = load <32 x i32>, <32 x i32> addrspace(1)* %in 267 %ext = sext <32 x i32> %load to <32 x i64> 268 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 269 ret void 270} 271 272; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64: 273; SI: buffer_load_dwordx4 274; SI: buffer_load_dwordx4 275; SI: buffer_load_dwordx4 276; SI: buffer_load_dwordx4 277; SI: buffer_load_dwordx4 278; SI: buffer_load_dwordx4 279; SI: buffer_load_dwordx4 280; SI: buffer_load_dwordx4 281 282; SI-DAG: buffer_store_dwordx4 283; SI-DAG: buffer_store_dwordx4 284; SI-DAG: buffer_store_dwordx4 285; SI-DAG: buffer_store_dwordx4 286 287; SI-DAG: buffer_store_dwordx4 288; SI-DAG: buffer_store_dwordx4 289; SI-DAG: buffer_store_dwordx4 290; SI-DAG: buffer_store_dwordx4 291 292; SI-DAG: buffer_store_dwordx4 293; SI-DAG: buffer_store_dwordx4 294; SI-DAG: buffer_store_dwordx4 295; SI-DAG: buffer_store_dwordx4 296 297; SI-DAG: buffer_store_dwordx4 298; SI-DAG: buffer_store_dwordx4 299; SI-DAG: buffer_store_dwordx4 300; SI-DAG: buffer_store_dwordx4 301 302; SI: s_endpgm 303define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind { 304 %load = load <32 x i32>, <32 x i32> addrspace(1)* %in 305 %ext = zext <32 x i32> %load to <32 x i64> 306 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 307 ret void 308} 309