1; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 2; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s 3 4; GCN-LABEL: {{^}}load_local_lo_v2i16_undeflo: 5; GCN: s_waitcnt 6; GFX9-NEXT: ds_read_u16_d16 v0, v0 7; GFX9-NEXT: s_waitcnt 8; GFX9-NEXT: s_setpc_b64 9 10; VI: ds_read_u16 11define <2 x i16> @load_local_lo_v2i16_undeflo(i16 addrspace(3)* %in) #0 { 12entry: 13 %load = load i16, i16 addrspace(3)* %in 14 %build = insertelement <2 x i16> undef, i16 %load, i32 0 15 ret <2 x i16> %build 16} 17 18; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo: 19; GCN: s_waitcnt 20; GFX9-NEXT: ds_read_u16_d16 v0, v0 21; GFX9-NEXT: s_waitcnt 22; GFX9-NEXT: s_setpc_b64 23 24; VI: ds_read_u16 25define <2 x i16> @load_local_lo_v2i16_reglo(i16 addrspace(3)* %in, i16 %reg) #0 { 26entry: 27 %load = load i16, i16 addrspace(3)* %in 28 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 29 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0 30 ret <2 x i16> %build1 31} 32 33; Show that we get reasonable regalloc without physreg constraints. 34; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg: 35; GCN: s_waitcnt 36; GFX9-NEXT: ds_read_u16_d16 v0, v0 37; GFX9-NEXT: s_waitcnt 38; GFX9-NEXT: global_store_dword v[0:1], v0, off{{$}} 39; GFX9-NEXT: s_waitcnt 40; GFX9-NEXT: s_setpc_b64 41 42; VI: ds_read_u16 43define void @load_local_lo_v2i16_reglo_vreg(i16 addrspace(3)* %in, i16 %reg) #0 { 44entry: 45 %load = load i16, i16 addrspace(3)* %in 46 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 47 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0 48 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 49 ret void 50} 51 52; GCN-LABEL: {{^}}load_local_lo_v2i16_zerolo: 53; GCN: s_waitcnt 54; GFX9-NEXT: v_mov_b32_e32 v1, 0 55; GFX9-NEXT: ds_read_u16_d16 v1, v0 56; GFX9-NEXT: s_waitcnt 57; GFX9-NEXT: v_mov_b32_e32 v0, v1 58; GFX9-NEXT: s_setpc_b64 59 60; VI: ds_read_u16 v 61define <2 x i16> @load_local_lo_v2i16_zerolo(i16 addrspace(3)* %in) #0 { 62entry: 63 %load = load i16, i16 addrspace(3)* %in 64 %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 0 65 ret <2 x i16> %build 66} 67 68; GCN-LABEL: {{^}}load_local_lo_v2f16_fpimm: 69; GCN: s_waitcnt 70; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 71; GFX9-NEXT: ds_read_u16_d16 v1, v0 72; GFX9-NEXT: s_waitcnt 73; GFX9-NEXT: v_mov_b32_e32 v0, v1 74; GFX9-NEXT: s_setpc_b64 75 76; VI: ds_read_u16 v 77define <2 x half> @load_local_lo_v2f16_fpimm(half addrspace(3)* %in) #0 { 78entry: 79 %load = load half, half addrspace(3)* %in 80 %build = insertelement <2 x half> <half 0.0, half 2.0>, half %load, i32 0 81 ret <2 x half> %build 82} 83 84; GCN-LABEL: {{^}}load_local_lo_v2f16_reghi_vreg: 85; GCN: s_waitcnt 86; GFX9-NEXT: ds_read_u16_d16 v1, v0 87; GFX9-NEXT: s_waitcnt 88; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}} 89; GFX9-NEXT: s_waitcnt 90; GFX9-NEXT: s_setpc_b64 91 92; VI: ds_read_u16 v 93define void @load_local_lo_v2f16_reghi_vreg(half addrspace(3)* %in, i32 %reg) #0 { 94entry: 95 %reg.bc = bitcast i32 %reg to <2 x half> 96 %load = load half, half addrspace(3)* %in 97 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 98 store <2 x half> %build1, <2 x half> addrspace(1)* undef 99 ret void 100} 101 102; GCN-LABEL: {{^}}load_local_lo_v2f16_reglo_vreg: 103 104; GFX9: ds_read_u16 v 105; GFX9: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} 106; GFX9: v_lshl_or_b32 v{{[0-9]+}}, v{{[0-9]+}}, 16, v{{[0-9]+}} 107; GFX9: global_store_dword 108 109; VI: ds_read_u16 v 110define void @load_local_lo_v2f16_reglo_vreg(half addrspace(3)* %in, half %reg) #0 { 111entry: 112 %load = load half, half addrspace(3)* %in 113 %build0 = insertelement <2 x half> undef, half %reg, i32 1 114 %build1 = insertelement <2 x half> %build0, half %load, i32 0 115 store <2 x half> %build1, <2 x half> addrspace(1)* undef 116 ret void 117} 118 119; GCN-LABEL: {{^}}load_local_lo_v2i16_reghi_vreg_zexti8: 120; GCN: s_waitcnt 121; GFX9-NEXT: ds_read_u8_d16 v1, v0 122; GFX9-NEXT: s_waitcnt 123; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}} 124; GFX9-NEXT: s_waitcnt 125; GFX9-NEXT: s_setpc_b64 126 127; VI: ds_read_u8 v 128define void @load_local_lo_v2i16_reghi_vreg_zexti8(i8 addrspace(3)* %in, i32 %reg) #0 { 129entry: 130 %reg.bc = bitcast i32 %reg to <2 x i16> 131 %load = load i8, i8 addrspace(3)* %in 132 %ext = zext i8 %load to i16 133 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 134 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 135 ret void 136} 137 138; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg_zexti8: 139; GCN: s_waitcnt 140; GFX9: ds_read_u8 v 141; GFX9: global_store_dword 142; GFX9-NEXT: s_waitcnt 143; GFX9-NEXT: s_setpc_b64 144 145; VI: ds_read_u8 v 146define void @load_local_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(3)* %in, i16 %reg) #0 { 147entry: 148 %load = load i8, i8 addrspace(3)* %in 149 %ext = zext i8 %load to i16 150 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 151 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0 152 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 153 ret void 154} 155 156; GCN-LABEL: {{^}}load_local_lo_v2i16_reghi_vreg_sexti8: 157; GCN: s_waitcnt 158; GFX9-NEXT: ds_read_i8_d16 v1, v0 159; GFX9-NEXT: s_waitcnt 160; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}} 161; GFX9-NEXT: s_waitcnt 162; GFX9-NEXT: s_setpc_b64 163 164; VI: ds_read_i8 v 165define void @load_local_lo_v2i16_reghi_vreg_sexti8(i8 addrspace(3)* %in, i32 %reg) #0 { 166entry: 167 %reg.bc = bitcast i32 %reg to <2 x i16> 168 %load = load i8, i8 addrspace(3)* %in 169 %ext = sext i8 %load to i16 170 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 171 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 172 ret void 173} 174 175; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg_sexti8: 176; GCN: s_waitcnt 177; GFX9: ds_read_i8 v 178; GFX9: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} 179; GFX9: v_lshl_or_b32 v{{[0-9]+}}, v{{[0-9]+}}, 16, v{{[0-9]+}} 180 181; VI: ds_read_i8 v 182define void @load_local_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(3)* %in, i16 %reg) #0 { 183entry: 184 %load = load i8, i8 addrspace(3)* %in 185 %ext = sext i8 %load to i16 186 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 187 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0 188 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 189 ret void 190} 191 192; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg: 193; GCN: s_waitcnt 194; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 195; GFX9-NEXT: s_waitcnt 196; GFX9-NEXT: global_store_dword 197; GFX9-NEXT: s_waitcnt 198; GFX9-NEXT: s_setpc_b64 199define void @load_global_lo_v2i16_reglo_vreg(i16 addrspace(1)* %in, i32 %reg) #0 { 200entry: 201 %reg.bc = bitcast i32 %reg to <2 x i16> 202 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047 203 %load = load i16, i16 addrspace(1)* %gep 204 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 205 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 206 ret void 207} 208 209; GCN-LABEL: {{^}}load_global_lo_v2f16_reglo_vreg: 210; GCN: s_waitcnt 211; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 212; GFX9-NEXT: s_waitcnt 213; GFX9-NEXT: global_store_dword 214; GFX9-NEXT: s_waitcnt 215; GFX9-NEXT: s_setpc_b64 216define void @load_global_lo_v2f16_reglo_vreg(half addrspace(1)* %in, i32 %reg) #0 { 217entry: 218 %reg.bc = bitcast i32 %reg to <2 x half> 219 %gep = getelementptr inbounds half, half addrspace(1)* %in, i64 -2047 220 %load = load half, half addrspace(1)* %gep 221 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 222 store <2 x half> %build1, <2 x half> addrspace(1)* undef 223 ret void 224} 225 226; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg_zexti8: 227; GCN: s_waitcnt 228; GFX9-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095 229; GFX9-NEXT: s_waitcnt 230; GFX9-NEXT: global_store_dword 231; GFX9-NEXT: s_waitcnt 232; GFX9-NEXT: s_setpc_b64 233define void @load_global_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i32 %reg) #0 { 234entry: 235 %reg.bc = bitcast i32 %reg to <2 x i16> 236 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095 237 %load = load i8, i8 addrspace(1)* %gep 238 %ext = zext i8 %load to i16 239 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 240 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 241 ret void 242} 243 244; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg_sexti8: 245; GCN: s_waitcnt 246; GFX9-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095 247; GFX9-NEXT: s_waitcnt 248; GFX9-NEXT: global_store_dword 249; GFX9-NEXT: s_waitcnt 250; GFX9-NEXT: s_setpc_b64 251define void @load_global_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i32 %reg) #0 { 252entry: 253 %reg.bc = bitcast i32 %reg to <2 x i16> 254 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095 255 %load = load i8, i8 addrspace(1)* %gep 256 %ext = sext i8 %load to i16 257 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 258 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 259 ret void 260} 261 262; GCN-LABEL: {{^}}load_flat_lo_v2i16_reghi_vreg: 263; GCN: s_waitcnt 264; GFX9-NEXT: flat_load_short_d16 v2, v[0:1] 265; GFX9-NEXT: s_waitcnt 266; GFX9-NEXT: global_store_dword v[0:1], v2 267; GFX9-NEXT: s_waitcnt 268; GFX9-NEXT: s_setpc_b64 269 270; VI: flat_load_ushort v{{[0-9]+}} 271; VI: v_or_b32_e32 272define void @load_flat_lo_v2i16_reghi_vreg(i16* %in, i32 %reg) #0 { 273entry: 274 %reg.bc = bitcast i32 %reg to <2 x i16> 275 %load = load i16, i16* %in 276 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 277 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 278 ret void 279} 280 281; GCN-LABEL: {{^}}load_flat_lo_v2f16_reghi_vreg: 282; GCN: s_waitcnt 283; GFX9-NEXT: flat_load_short_d16 v2, v[0:1] 284; GFX9-NEXT: s_waitcnt 285; GFX9-NEXT: global_store_dword v[0:1], v2 286; GFX9-NEXT: s_waitcnt 287; GFX9-NEXT: s_setpc_b64 288 289; VI: flat_load_ushort v{{[0-9]+}} 290; VI: v_or_b32_e32 291define void @load_flat_lo_v2f16_reghi_vreg(half* %in, i32 %reg) #0 { 292entry: 293 %reg.bc = bitcast i32 %reg to <2 x half> 294 %load = load half, half* %in 295 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 296 store <2 x half> %build1, <2 x half> addrspace(1)* undef 297 ret void 298} 299 300; GCN-LABEL: {{^}}load_flat_lo_v2i16_reglo_vreg_zexti8: 301; GCN: s_waitcnt 302; GFX9-NEXT: flat_load_ubyte_d16 v2, v[0:1] 303; GFX9-NEXT: s_waitcnt 304; GFX9-NEXT: global_store_dword v[0:1], v2 305; GFX9-NEXT: s_waitcnt 306; GFX9-NEXT: s_setpc_b64 307 308; VI: flat_load_ubyte v{{[0-9]+}} 309; VI: v_or_b32_e32 310define void @load_flat_lo_v2i16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 { 311entry: 312 %reg.bc = bitcast i32 %reg to <2 x i16> 313 %load = load i8, i8* %in 314 %ext = zext i8 %load to i16 315 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 316 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 317 ret void 318} 319 320; GCN-LABEL: {{^}}load_flat_lo_v2i16_reglo_vreg_sexti8: 321; GCN: s_waitcnt 322; GFX9-NEXT: flat_load_sbyte_d16 v2, v[0:1] 323; GFX9-NEXT: s_waitcnt 324; GFX9-NEXT: global_store_dword v[0:1], v2 325; GFX9-NEXT: s_waitcnt 326; GFX9-NEXT: s_setpc_b64 327 328; VI: flat_load_sbyte v{{[0-9]+}} 329; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 330 331define void @load_flat_lo_v2i16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 { 332entry: 333 %reg.bc = bitcast i32 %reg to <2 x i16> 334 %load = load i8, i8* %in 335 %ext = sext i8 %load to i16 336 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 337 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 338 ret void 339} 340 341; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg: 342; GCN: s_waitcnt 343; GFX9: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094{{$}} 344; GFX9-NEXT: s_waitcnt 345; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 346; GFX9-NEXT: s_waitcnt 347; GFX9-NEXT: s_setpc_b64 348 349; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}} 350define void @load_private_lo_v2i16_reglo_vreg(i16 addrspace(5)* byval %in, i32 %reg) #0 { 351entry: 352 %reg.bc = bitcast i32 %reg to <2 x i16> 353 %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045 354 %load = load i16, i16 addrspace(5)* %gep 355 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 356 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 357 ret void 358} 359 360; GCN-LABEL: {{^}}load_private_lo_v2i16_reghi_vreg: 361; GCN: s_waitcnt 362; GFX9: buffer_load_ushort v1, off, s[0:3], s5 offset:4094{{$}} 363; GFX9-NEXT: s_waitcnt 364; GFX9: v_and_b32 365; GFX9: v_lshl_or_b32 366 367; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} 368; GFX9-NEXT: s_waitcnt 369; GFX9-NEXT: s_setpc_b64 370 371; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}} 372define void @load_private_lo_v2i16_reghi_vreg(i16 addrspace(5)* byval %in, i16 %reg) #0 { 373entry: 374 %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045 375 %load = load i16, i16 addrspace(5)* %gep 376 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 377 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0 378 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 379 ret void 380} 381 382; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg: 383; GCN: s_waitcnt 384; GFX9: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094{{$}} 385; GFX9-NEXT: s_waitcnt 386; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 387; GFX9-NEXT: s_waitcnt 388; GFX9-NEXT: s_setpc_b64 389 390; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}} 391define void @load_private_lo_v2f16_reglo_vreg(half addrspace(5)* byval %in, i32 %reg) #0 { 392entry: 393 %reg.bc = bitcast i32 %reg to <2 x half> 394 %gep = getelementptr inbounds half, half addrspace(5)* %in, i64 2045 395 %load = load half, half addrspace(5)* %gep 396 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 397 store <2 x half> %build1, <2 x half> addrspace(1)* undef 398 ret void 399} 400 401; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff: 402; GCN: s_waitcnt 403; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}} 404; GFX9-NEXT: s_waitcnt 405; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 406; GFX9-NEXT: s_waitcnt 407; GFX9-NEXT: s_setpc_b64 408 409; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}} 410define void @load_private_lo_v2i16_reglo_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 { 411entry: 412 %reg.bc = bitcast i32 %reg to <2 x i16> 413 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*) 414 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 415 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 416 ret void 417} 418 419; GCN-LABEL: {{^}}load_private_lo_v2i16_reghi_vreg_nooff: 420; GCN: s_waitcnt 421; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}} 422; GFX9-NEXT: s_waitcnt 423; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 424; GFX9-NEXT: s_waitcnt 425; GFX9-NEXT: s_setpc_b64 426 427; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}} 428define void @load_private_lo_v2i16_reghi_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 { 429entry: 430 %reg.bc = bitcast i32 %reg to <2 x i16> 431 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*) 432 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 433 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 434 ret void 435} 436 437; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_nooff: 438; GCN: s_waitcnt 439; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}} 440; GFX9-NEXT: s_waitcnt 441; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 442; GFX9-NEXT: s_waitcnt 443; GFX9-NEXT: s_setpc_b64 444 445; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}} 446define void @load_private_lo_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, i32 %reg) #0 { 447entry: 448 %reg.bc = bitcast i32 %reg to <2 x half> 449 %load = load volatile half, half addrspace(5)* inttoptr (i32 4094 to half addrspace(5)*) 450 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 451 store <2 x half> %build1, <2 x half> addrspace(1)* undef 452 ret void 453} 454 455; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_zexti8: 456; GCN: s_waitcnt 457; GFX9: buffer_load_ubyte_d16 v0, off, s[0:3], s5 offset:4095{{$}} 458; GFX9-NEXT: s_waitcnt 459; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 460; GFX9-NEXT: s_waitcnt 461; GFX9-NEXT: s_setpc_b64 462 463; VI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}} 464define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 { 465entry: 466 %reg.bc = bitcast i32 %reg to <2 x i16> 467 %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091 468 %load = load i8, i8 addrspace(5)* %gep 469 %ext = zext i8 %load to i16 470 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 471 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 472 ret void 473} 474 475; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_sexti8: 476; GCN: s_waitcnt 477; GFX9: buffer_load_sbyte_d16 v0, off, s[0:3], s5 offset:4095{{$}} 478; GFX9-NEXT: s_waitcnt 479; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 480; GFX9-NEXT: s_waitcnt 481; GFX9-NEXT: s_setpc_b64 482 483; VI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}} 484define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 { 485entry: 486 %reg.bc = bitcast i32 %reg to <2 x i16> 487 %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091 488 %load = load i8, i8 addrspace(5)* %gep 489 %ext = sext i8 %load to i16 490 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 491 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 492 ret void 493} 494 495; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff_zexti8: 496; GCN: s_waitcnt 497; GFX9-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s4 offset:4094{{$}} 498; GFX9-NEXT: s_waitcnt 499; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 500; GFX9-NEXT: s_waitcnt 501; GFX9-NEXT: s_setpc_b64 502 503; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}} 504define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 { 505entry: 506 %reg.bc = bitcast i32 %reg to <2 x i16> 507 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) 508 %ext = zext i8 %load to i16 509 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 510 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 511 ret void 512} 513 514; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff_sexti8: 515; GCN: s_waitcnt 516; GFX9-NEXT: buffer_load_sbyte_d16 v1, off, s[0:3], s4 offset:4094{{$}} 517; GFX9-NEXT: s_waitcnt 518; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 519; GFX9-NEXT: s_waitcnt 520; GFX9-NEXT: s_setpc_b64 521 522; VI: buffer_load_sbyte v0, off, s[0:3], s4 offset:4094{{$}} 523define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i32 %reg) #0 { 524entry: 525 %reg.bc = bitcast i32 %reg to <2 x i16> 526 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) 527 %ext = sext i8 %load to i16 528 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 529 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 530 ret void 531} 532 533; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_nooff_zexti8: 534; GCN: s_waitcnt 535; GFX9-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s4 offset:4094{{$}} 536; GFX9-NEXT: s_waitcnt 537; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 538; GFX9-NEXT: s_waitcnt 539; GFX9-NEXT: s_setpc_b64 540 541; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}} 542define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 { 543entry: 544 %reg.bc = bitcast i32 %reg to <2 x half> 545 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) 546 %ext = zext i8 %load to i16 547 %bc.ext = bitcast i16 %ext to half 548 %build1 = insertelement <2 x half> %reg.bc, half %bc.ext, i32 0 549 store <2 x half> %build1, <2 x half> addrspace(1)* undef 550 ret void 551} 552 553; GCN-LABEL: {{^}}load_constant_lo_v2i16_reglo_vreg: 554; GCN: s_waitcnt 555; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 556; GFX9-NEXT: s_waitcnt 557; GFX9-NEXT: global_store_dword 558; GFX9-NEXT: s_waitcnt 559; GFX9-NEXT: s_setpc_b64 560 561; VI: flat_load_ushort 562define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(4)* %in, i32 %reg) #0 { 563entry: 564 %reg.bc = bitcast i32 %reg to <2 x i16> 565 %gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 -2047 566 %load = load i16, i16 addrspace(4)* %gep 567 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 568 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 569 ret void 570} 571 572; GCN-LABEL: load_constant_lo_v2f16_reglo_vreg 573; GCN: s_waitcnt 574; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 575; GFX9-NEXT: s_waitcnt 576; GFX9-NEXT: global_store_dword 577; GFX9-NEXT: s_waitcnt 578; GFX9-NEXT: s_setpc_b64 579 580; VI: flat_load_ushort 581define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(4)* %in, i32 %reg) #0 { 582entry: 583 %reg.bc = bitcast i32 %reg to <2 x half> 584 %gep = getelementptr inbounds half, half addrspace(4)* %in, i64 -2047 585 %load = load half, half addrspace(4)* %gep 586 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 587 store <2 x half> %build1, <2 x half> addrspace(1)* undef 588 ret void 589} 590 591; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_to_offset: 592; GFX9: buffer_store_dword 593; GFX9-NEXT: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094 594 595; VI: buffer_load_ushort v 596define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 { 597entry: 598 %obj0 = alloca [10 x i32], align 4, addrspace(5) 599 %obj1 = alloca [4096 x i16], align 2, addrspace(5) 600 %reg.bc = bitcast i32 %reg to <2 x i16> 601 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 602 store volatile i32 123, i32 addrspace(5)* %bc 603 %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2025 604 %load = load volatile i16, i16 addrspace(5)* %gep 605 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 606 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 607 ret void 608} 609 610; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_sexti8_to_offset: 611; GFX9: buffer_store_dword 612; GFX9-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s5 offset:4095 613 614; VI: buffer_load_sbyte v 615define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 { 616entry: 617 %obj0 = alloca [10 x i32], align 4, addrspace(5) 618 %obj1 = alloca [4096 x i8], align 2, addrspace(5) 619 %reg.bc = bitcast i32 %reg to <2 x i16> 620 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 621 store volatile i32 123, i32 addrspace(5)* %bc 622 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051 623 %load = load volatile i8, i8 addrspace(5)* %gep 624 %load.ext = sext i8 %load to i16 625 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0 626 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 627 ret void 628} 629 630; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_zexti8_to_offset: 631; GFX9: buffer_store_dword 632; GFX9-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s5 offset:4095 633 634; VI: buffer_load_ubyte v 635define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 { 636entry: 637 %obj0 = alloca [10 x i32], align 4, addrspace(5) 638 %obj1 = alloca [4096 x i8], align 2, addrspace(5) 639 %reg.bc = bitcast i32 %reg to <2 x i16> 640 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 641 store volatile i32 123, i32 addrspace(5)* %bc 642 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051 643 %load = load volatile i8, i8 addrspace(5)* %gep 644 %load.ext = zext i8 %load to i16 645 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0 646 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 647 ret void 648} 649 650attributes #0 = { nounwind } 651