1; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4; FIXME: cypress is broken because the bigger testcases spill and it's not implemented 5 6; FUNC-LABEL: {{^}}zextload_global_i16_to_i32: 7; SI: buffer_load_ushort 8; SI: buffer_store_dword 9; SI: s_endpgm 10define amdgpu_kernel void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { 11 %a = load i16, i16 addrspace(1)* %in 12 %ext = zext i16 %a to i32 13 store i32 %ext, i32 addrspace(1)* %out 14 ret void 15} 16 17; FUNC-LABEL: {{^}}sextload_global_i16_to_i32: 18; SI: buffer_load_sshort 19; SI: buffer_store_dword 20; SI: s_endpgm 21define amdgpu_kernel void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { 22 %a = load i16, i16 addrspace(1)* %in 23 %ext = sext i16 %a to i32 24 store i32 %ext, i32 addrspace(1)* %out 25 ret void 26} 27 28; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i32: 29; SI: buffer_load_ushort 30; SI: s_endpgm 31define amdgpu_kernel void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { 32 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 33 %ext = zext <1 x i16> %load to <1 x i32> 34 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 35 ret void 36} 37 38; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i32: 39; SI: buffer_load_sshort 40; SI: s_endpgm 41define amdgpu_kernel void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { 42 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 43 %ext = sext <1 x i16> %load to <1 x i32> 44 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 45 ret void 46} 47 48; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32: 49; SI: s_endpgm 50define amdgpu_kernel void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { 51 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 52 %ext = zext <2 x i16> %load to <2 x i32> 53 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 54 ret void 55} 56 57; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32: 58; SI: s_endpgm 59define amdgpu_kernel void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { 60 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 61 %ext = sext <2 x i16> %load to <2 x i32> 62 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 63 ret void 64} 65 66; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32: 67; SI: s_endpgm 68define amdgpu_kernel void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { 69 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 70 %ext = zext <4 x i16> %load to <4 x i32> 71 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 72 ret void 73} 74 75; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32: 76; SI: s_endpgm 77define amdgpu_kernel void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { 78 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 79 %ext = sext <4 x i16> %load to <4 x i32> 80 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 81 ret void 82} 83 84; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32: 85; SI: s_endpgm 86define amdgpu_kernel void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { 87 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 88 %ext = zext <8 x i16> %load to <8 x i32> 89 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 90 ret void 91} 92 93; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32: 94; SI: s_endpgm 95define amdgpu_kernel void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { 96 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 97 %ext = sext <8 x i16> %load to <8 x i32> 98 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 99 ret void 100} 101 102; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32: 103; SI: s_endpgm 104define amdgpu_kernel void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { 105 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 106 %ext = zext <16 x i16> %load to <16 x i32> 107 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 108 ret void 109} 110 111; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32: 112; SI: s_endpgm 113define amdgpu_kernel void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { 114 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 115 %ext = sext <16 x i16> %load to <16 x i32> 116 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 117 ret void 118} 119 120; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32: 121; SI: s_endpgm 122define amdgpu_kernel void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { 123 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 124 %ext = zext <32 x i16> %load to <32 x i32> 125 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 126 ret void 127} 128 129; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32: 130; SI: s_endpgm 131define amdgpu_kernel void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { 132 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 133 %ext = sext <32 x i16> %load to <32 x i32> 134 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 135 ret void 136} 137 138; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32: 139; SI: s_endpgm 140define amdgpu_kernel void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { 141 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 142 %ext = zext <64 x i16> %load to <64 x i32> 143 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 144 ret void 145} 146 147; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32: 148; SI: s_endpgm 149define amdgpu_kernel void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { 150 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 151 %ext = sext <64 x i16> %load to <64 x i32> 152 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 153 ret void 154} 155 156; FUNC-LABEL: {{^}}zextload_global_i16_to_i64: 157; SI-DAG: buffer_load_ushort v[[LO:[0-9]+]], 158; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 159; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] 160define amdgpu_kernel void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { 161 %a = load i16, i16 addrspace(1)* %in 162 %ext = zext i16 %a to i64 163 store i64 %ext, i64 addrspace(1)* %out 164 ret void 165} 166 167; FUNC-LABEL: {{^}}sextload_global_i16_to_i64: 168; VI: buffer_load_ushort [[LOAD:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 169; VI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]] 170; VI: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 171define amdgpu_kernel void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { 172 %a = load i16, i16 addrspace(1)* %in 173 %ext = sext i16 %a to i64 174 store i64 %ext, i64 addrspace(1)* %out 175 ret void 176} 177 178; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64: 179; SI: s_endpgm 180define amdgpu_kernel void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { 181 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 182 %ext = zext <1 x i16> %load to <1 x i64> 183 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 184 ret void 185} 186 187; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64: 188; SI: s_endpgm 189define amdgpu_kernel void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind { 190 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 191 %ext = sext <1 x i16> %load to <1 x i64> 192 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 193 ret void 194} 195 196; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64: 197; SI: s_endpgm 198define amdgpu_kernel void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { 199 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 200 %ext = zext <2 x i16> %load to <2 x i64> 201 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 202 ret void 203} 204 205; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64: 206; SI: s_endpgm 207define amdgpu_kernel void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind { 208 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 209 %ext = sext <2 x i16> %load to <2 x i64> 210 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 211 ret void 212} 213 214; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64: 215; SI: s_endpgm 216define amdgpu_kernel void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { 217 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 218 %ext = zext <4 x i16> %load to <4 x i64> 219 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 220 ret void 221} 222 223; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64: 224; SI: s_endpgm 225define amdgpu_kernel void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind { 226 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 227 %ext = sext <4 x i16> %load to <4 x i64> 228 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 229 ret void 230} 231 232; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64: 233; SI: s_endpgm 234define amdgpu_kernel void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { 235 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 236 %ext = zext <8 x i16> %load to <8 x i64> 237 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 238 ret void 239} 240 241; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64: 242; SI: s_endpgm 243define amdgpu_kernel void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind { 244 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 245 %ext = sext <8 x i16> %load to <8 x i64> 246 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 247 ret void 248} 249 250; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64: 251; SI: s_endpgm 252define amdgpu_kernel void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { 253 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 254 %ext = zext <16 x i16> %load to <16 x i64> 255 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 256 ret void 257} 258 259; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64: 260; SI: s_endpgm 261define amdgpu_kernel void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind { 262 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 263 %ext = sext <16 x i16> %load to <16 x i64> 264 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 265 ret void 266} 267 268; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64: 269; SI: s_endpgm 270define amdgpu_kernel void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { 271 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 272 %ext = zext <32 x i16> %load to <32 x i64> 273 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 274 ret void 275} 276 277; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64: 278; SI: s_endpgm 279define amdgpu_kernel void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind { 280 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 281 %ext = sext <32 x i16> %load to <32 x i64> 282 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 283 ret void 284} 285 286; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64: 287; SI: s_endpgm 288define amdgpu_kernel void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { 289 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 290 %ext = zext <64 x i16> %load to <64 x i64> 291 store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 292 ret void 293} 294 295; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64: 296; SI: s_endpgm 297define amdgpu_kernel void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind { 298 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 299 %ext = sext <64 x i16> %load to <64 x i64> 300 store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 301 ret void 302} 303