1; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s 2; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600 --check-prefix=FUNC %s 3; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s 4; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s 5 6;===------------------------------------------------------------------------===; 7; GLOBAL ADDRESS SPACE 8;===------------------------------------------------------------------------===; 9 10; Load an i8 value from the global address space. 11; FUNC-LABEL: {{^}}load_i8: 12; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 13 14; SI: buffer_load_ubyte v{{[0-9]+}}, 15define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 16 %1 = load i8, i8 addrspace(1)* %in 17 %2 = zext i8 %1 to i32 18 store i32 %2, i32 addrspace(1)* %out 19 ret void 20} 21 22; FUNC-LABEL: {{^}}load_i8_sext: 23; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] 24; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 25; R600: 8 26; SI: buffer_load_sbyte 27define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 28entry: 29 %0 = load i8, i8 addrspace(1)* %in 30 %1 = sext i8 %0 to i32 31 store i32 %1, i32 addrspace(1)* %out 32 ret void 33} 34 35; FUNC-LABEL: {{^}}load_v2i8: 36; R600: VTX_READ_8 37; R600: VTX_READ_8 38; SI: buffer_load_ubyte 39; SI: buffer_load_ubyte 40define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { 41entry: 42 %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in 43 %1 = zext <2 x i8> %0 to <2 x i32> 44 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 45 ret void 46} 47 48; FUNC-LABEL: {{^}}load_v2i8_sext: 49; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 50; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 51; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal 52; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal 53; R600-DAG: 8 54; R600-DAG: 8 55 56; SI: buffer_load_sbyte 57; SI: buffer_load_sbyte 58define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { 59entry: 60 %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in 61 %1 = sext <2 x i8> %0 to <2 x i32> 62 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 63 ret void 64} 65 66; FUNC-LABEL: {{^}}load_v4i8: 67; R600: VTX_READ_8 68; R600: VTX_READ_8 69; R600: VTX_READ_8 70; R600: VTX_READ_8 71; SI: buffer_load_ubyte 72; SI: buffer_load_ubyte 73; SI: buffer_load_ubyte 74; SI: buffer_load_ubyte 75define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { 76entry: 77 %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in 78 %1 = zext <4 x i8> %0 to <4 x i32> 79 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 80 ret void 81} 82 83; FUNC-LABEL: {{^}}load_v4i8_sext: 84; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 85; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 86; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] 87; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] 88; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal 89; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal 90; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal 91; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal 92; R600-DAG: 8 93; R600-DAG: 8 94; R600-DAG: 8 95; R600-DAG: 8 96; SI: buffer_load_sbyte 97; SI: buffer_load_sbyte 98; SI: buffer_load_sbyte 99; SI: buffer_load_sbyte 100define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { 101entry: 102 %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in 103 %1 = sext <4 x i8> %0 to <4 x i32> 104 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 105 ret void 106} 107 108; Load an i16 value from the global address space. 109; FUNC-LABEL: {{^}}load_i16: 110; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 111; SI: buffer_load_ushort 112define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { 113entry: 114 %0 = load i16 , i16 addrspace(1)* %in 115 %1 = zext i16 %0 to i32 116 store i32 %1, i32 addrspace(1)* %out 117 ret void 118} 119 120; FUNC-LABEL: {{^}}load_i16_sext: 121; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] 122; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 123; R600: 16 124; SI: buffer_load_sshort 125define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { 126entry: 127 %0 = load i16, i16 addrspace(1)* %in 128 %1 = sext i16 %0 to i32 129 store i32 %1, i32 addrspace(1)* %out 130 ret void 131} 132 133; FUNC-LABEL: {{^}}load_v2i16: 134; R600: VTX_READ_16 135; R600: VTX_READ_16 136; SI: buffer_load_ushort 137; SI: buffer_load_ushort 138define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 139entry: 140 %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in 141 %1 = zext <2 x i16> %0 to <2 x i32> 142 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 143 ret void 144} 145 146; FUNC-LABEL: {{^}}load_v2i16_sext: 147; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 148; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 149; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal 150; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal 151; R600-DAG: 16 152; R600-DAG: 16 153; SI: buffer_load_sshort 154; SI: buffer_load_sshort 155define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 156entry: 157 %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in 158 %1 = sext <2 x i16> %0 to <2 x i32> 159 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 160 ret void 161} 162 163; FUNC-LABEL: {{^}}load_v4i16: 164; R600: VTX_READ_16 165; R600: VTX_READ_16 166; R600: VTX_READ_16 167; R600: VTX_READ_16 168; SI: buffer_load_ushort 169; SI: buffer_load_ushort 170; SI: buffer_load_ushort 171; SI: buffer_load_ushort 172define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 173entry: 174 %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in 175 %1 = zext <4 x i16> %0 to <4 x i32> 176 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 177 ret void 178} 179 180; FUNC-LABEL: {{^}}load_v4i16_sext: 181; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 182; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 183; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] 184; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] 185; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal 186; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal 187; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal 188; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal 189; R600-DAG: 16 190; R600-DAG: 16 191; R600-DAG: 16 192; R600-DAG: 16 193; SI: buffer_load_sshort 194; SI: buffer_load_sshort 195; SI: buffer_load_sshort 196; SI: buffer_load_sshort 197define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 198entry: 199 %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in 200 %1 = sext <4 x i16> %0 to <4 x i32> 201 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 202 ret void 203} 204 205; load an i32 value from the global address space. 206; FUNC-LABEL: {{^}}load_i32: 207; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 208 209; SI: buffer_load_dword v{{[0-9]+}} 210define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 211entry: 212 %0 = load i32, i32 addrspace(1)* %in 213 store i32 %0, i32 addrspace(1)* %out 214 ret void 215} 216 217; load a f32 value from the global address space. 218; FUNC-LABEL: {{^}}load_f32: 219; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 220 221; SI: buffer_load_dword v{{[0-9]+}} 222define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 223entry: 224 %0 = load float, float addrspace(1)* %in 225 store float %0, float addrspace(1)* %out 226 ret void 227} 228 229; load a v2f32 value from the global address space 230; FUNC-LABEL: {{^}}load_v2f32: 231; R600: MEM_RAT 232; R600: VTX_READ_64 233; SI: buffer_load_dwordx2 234define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { 235entry: 236 %0 = load <2 x float>, <2 x float> addrspace(1)* %in 237 store <2 x float> %0, <2 x float> addrspace(1)* %out 238 ret void 239} 240 241; FUNC-LABEL: {{^}}load_i64: 242; R600: VTX_READ_64 243; SI: buffer_load_dwordx2 244define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 245entry: 246 %0 = load i64, i64 addrspace(1)* %in 247 store i64 %0, i64 addrspace(1)* %out 248 ret void 249} 250 251; FUNC-LABEL: {{^}}load_i64_sext: 252; R600: MEM_RAT 253; R600: MEM_RAT 254; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x 255; R600: 31 256; SI: buffer_load_dword 257 258define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { 259entry: 260 %0 = load i32, i32 addrspace(1)* %in 261 %1 = sext i32 %0 to i64 262 store i64 %1, i64 addrspace(1)* %out 263 ret void 264} 265 266; FUNC-LABEL: {{^}}load_i64_zext: 267; R600: MEM_RAT 268; R600: MEM_RAT 269define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { 270entry: 271 %0 = load i32, i32 addrspace(1)* %in 272 %1 = zext i32 %0 to i64 273 store i64 %1, i64 addrspace(1)* %out 274 ret void 275} 276 277; FUNC-LABEL: {{^}}load_v8i32: 278; R600: VTX_READ_128 279; R600: VTX_READ_128 280 281; SI: buffer_load_dwordx4 282; SI: buffer_load_dwordx4 283define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) { 284entry: 285 %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in 286 store <8 x i32> %0, <8 x i32> addrspace(1)* %out 287 ret void 288} 289 290; FUNC-LABEL: {{^}}load_v16i32: 291; R600: VTX_READ_128 292; R600: VTX_READ_128 293; R600: VTX_READ_128 294; R600: VTX_READ_128 295 296; SI: buffer_load_dwordx4 297; SI: buffer_load_dwordx4 298; SI: buffer_load_dwordx4 299; SI: buffer_load_dwordx4 300define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) { 301entry: 302 %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in 303 store <16 x i32> %0, <16 x i32> addrspace(1)* %out 304 ret void 305} 306 307;===------------------------------------------------------------------------===; 308; CONSTANT ADDRESS SPACE 309;===------------------------------------------------------------------------===; 310 311; Load a sign-extended i8 value 312; FUNC-LABEL: {{^}}load_const_i8_sext: 313; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] 314; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 315; R600: 8 316; SI: buffer_load_sbyte v{{[0-9]+}}, 317define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 318entry: 319 %0 = load i8, i8 addrspace(2)* %in 320 %1 = sext i8 %0 to i32 321 store i32 %1, i32 addrspace(1)* %out 322 ret void 323} 324 325; Load an aligned i8 value 326; FUNC-LABEL: {{^}}load_const_i8_aligned: 327; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 328; SI: buffer_load_ubyte v{{[0-9]+}}, 329define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 330entry: 331 %0 = load i8, i8 addrspace(2)* %in 332 %1 = zext i8 %0 to i32 333 store i32 %1, i32 addrspace(1)* %out 334 ret void 335} 336 337; Load an un-aligned i8 value 338; FUNC-LABEL: {{^}}load_const_i8_unaligned: 339; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 340; SI: buffer_load_ubyte v{{[0-9]+}}, 341define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 342entry: 343 %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1 344 %1 = load i8, i8 addrspace(2)* %0 345 %2 = zext i8 %1 to i32 346 store i32 %2, i32 addrspace(1)* %out 347 ret void 348} 349 350; Load a sign-extended i16 value 351; FUNC-LABEL: {{^}}load_const_i16_sext: 352; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] 353; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 354; R600: 16 355; SI: buffer_load_sshort 356define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 357entry: 358 %0 = load i16, i16 addrspace(2)* %in 359 %1 = sext i16 %0 to i32 360 store i32 %1, i32 addrspace(1)* %out 361 ret void 362} 363 364; Load an aligned i16 value 365; FUNC-LABEL: {{^}}load_const_i16_aligned: 366; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 367; SI: buffer_load_ushort 368define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 369entry: 370 %0 = load i16, i16 addrspace(2)* %in 371 %1 = zext i16 %0 to i32 372 store i32 %1, i32 addrspace(1)* %out 373 ret void 374} 375 376; Load an un-aligned i16 value 377; FUNC-LABEL: {{^}}load_const_i16_unaligned: 378; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 379; SI: buffer_load_ushort 380define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 381entry: 382 %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1 383 %1 = load i16, i16 addrspace(2)* %0 384 %2 = zext i16 %1 to i32 385 store i32 %2, i32 addrspace(1)* %out 386 ret void 387} 388 389; Load an i32 value from the constant address space. 390; FUNC-LABEL: {{^}}load_const_addrspace_i32: 391; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 392 393; SI: s_load_dword s{{[0-9]+}} 394define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 395entry: 396 %0 = load i32, i32 addrspace(2)* %in 397 store i32 %0, i32 addrspace(1)* %out 398 ret void 399} 400 401; Load a f32 value from the constant address space. 402; FUNC-LABEL: {{^}}load_const_addrspace_f32: 403; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 404 405; SI: s_load_dword s{{[0-9]+}} 406define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) { 407 %1 = load float, float addrspace(2)* %in 408 store float %1, float addrspace(1)* %out 409 ret void 410} 411 412;===------------------------------------------------------------------------===; 413; LOCAL ADDRESS SPACE 414;===------------------------------------------------------------------------===; 415 416; Load an i8 value from the local address space. 417; FUNC-LABEL: {{^}}load_i8_local: 418; R600: LDS_UBYTE_READ_RET 419; SI-NOT: s_wqm_b64 420; SI: s_mov_b32 m0 421; SI: ds_read_u8 422define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { 423 %1 = load i8, i8 addrspace(3)* %in 424 %2 = zext i8 %1 to i32 425 store i32 %2, i32 addrspace(1)* %out 426 ret void 427} 428 429; FUNC-LABEL: {{^}}load_i8_sext_local: 430; R600: LDS_UBYTE_READ_RET 431; R600: BFE_INT 432; SI-NOT: s_wqm_b64 433; SI: s_mov_b32 m0 434; SI: ds_read_i8 435define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { 436entry: 437 %0 = load i8, i8 addrspace(3)* %in 438 %1 = sext i8 %0 to i32 439 store i32 %1, i32 addrspace(1)* %out 440 ret void 441} 442 443; FUNC-LABEL: {{^}}load_v2i8_local: 444; R600: LDS_UBYTE_READ_RET 445; R600: LDS_UBYTE_READ_RET 446; SI-NOT: s_wqm_b64 447; SI: s_mov_b32 m0 448; SI: ds_read_u8 449; SI: ds_read_u8 450define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { 451entry: 452 %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in 453 %1 = zext <2 x i8> %0 to <2 x i32> 454 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 455 ret void 456} 457 458; FUNC-LABEL: {{^}}load_v2i8_sext_local: 459; R600-DAG: LDS_UBYTE_READ_RET 460; R600-DAG: LDS_UBYTE_READ_RET 461; R600-DAG: BFE_INT 462; R600-DAG: BFE_INT 463; SI-NOT: s_wqm_b64 464; SI: s_mov_b32 m0 465; SI: ds_read_i8 466; SI: ds_read_i8 467define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { 468entry: 469 %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in 470 %1 = sext <2 x i8> %0 to <2 x i32> 471 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 472 ret void 473} 474 475; FUNC-LABEL: {{^}}load_v4i8_local: 476; R600: LDS_UBYTE_READ_RET 477; R600: LDS_UBYTE_READ_RET 478; R600: LDS_UBYTE_READ_RET 479; R600: LDS_UBYTE_READ_RET 480; SI-NOT: s_wqm_b64 481; SI: s_mov_b32 m0 482; SI: ds_read_u8 483; SI: ds_read_u8 484; SI: ds_read_u8 485; SI: ds_read_u8 486define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { 487entry: 488 %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in 489 %1 = zext <4 x i8> %0 to <4 x i32> 490 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 491 ret void 492} 493 494; FUNC-LABEL: {{^}}load_v4i8_sext_local: 495; R600-DAG: LDS_UBYTE_READ_RET 496; R600-DAG: LDS_UBYTE_READ_RET 497; R600-DAG: LDS_UBYTE_READ_RET 498; R600-DAG: LDS_UBYTE_READ_RET 499; R600-DAG: BFE_INT 500; R600-DAG: BFE_INT 501; R600-DAG: BFE_INT 502; R600-DAG: BFE_INT 503; SI-NOT: s_wqm_b64 504; SI: s_mov_b32 m0 505; SI: ds_read_i8 506; SI: ds_read_i8 507; SI: ds_read_i8 508; SI: ds_read_i8 509define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { 510entry: 511 %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in 512 %1 = sext <4 x i8> %0 to <4 x i32> 513 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 514 ret void 515} 516 517; Load an i16 value from the local address space. 518; FUNC-LABEL: {{^}}load_i16_local: 519; R600: LDS_USHORT_READ_RET 520; SI-NOT: s_wqm_b64 521; SI: s_mov_b32 m0 522; SI: ds_read_u16 523define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { 524entry: 525 %0 = load i16 , i16 addrspace(3)* %in 526 %1 = zext i16 %0 to i32 527 store i32 %1, i32 addrspace(1)* %out 528 ret void 529} 530 531; FUNC-LABEL: {{^}}load_i16_sext_local: 532; R600: LDS_USHORT_READ_RET 533; R600: BFE_INT 534; SI-NOT: s_wqm_b64 535; SI: s_mov_b32 m0 536; SI: ds_read_i16 537define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { 538entry: 539 %0 = load i16, i16 addrspace(3)* %in 540 %1 = sext i16 %0 to i32 541 store i32 %1, i32 addrspace(1)* %out 542 ret void 543} 544 545; FUNC-LABEL: {{^}}load_v2i16_local: 546; R600: LDS_USHORT_READ_RET 547; R600: LDS_USHORT_READ_RET 548; SI-NOT: s_wqm_b64 549; SI: s_mov_b32 m0 550; SI: ds_read_u16 551; SI: ds_read_u16 552define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { 553entry: 554 %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in 555 %1 = zext <2 x i16> %0 to <2 x i32> 556 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 557 ret void 558} 559 560; FUNC-LABEL: {{^}}load_v2i16_sext_local: 561; R600-DAG: LDS_USHORT_READ_RET 562; R600-DAG: LDS_USHORT_READ_RET 563; R600-DAG: BFE_INT 564; R600-DAG: BFE_INT 565; SI-NOT: s_wqm_b64 566; SI: s_mov_b32 m0 567; SI: ds_read_i16 568; SI: ds_read_i16 569define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { 570entry: 571 %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in 572 %1 = sext <2 x i16> %0 to <2 x i32> 573 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 574 ret void 575} 576 577; FUNC-LABEL: {{^}}load_v4i16_local: 578; R600: LDS_USHORT_READ_RET 579; R600: LDS_USHORT_READ_RET 580; R600: LDS_USHORT_READ_RET 581; R600: LDS_USHORT_READ_RET 582; SI-NOT: s_wqm_b64 583; SI: s_mov_b32 m0 584; SI: ds_read_u16 585; SI: ds_read_u16 586; SI: ds_read_u16 587; SI: ds_read_u16 588define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { 589entry: 590 %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in 591 %1 = zext <4 x i16> %0 to <4 x i32> 592 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 593 ret void 594} 595 596; FUNC-LABEL: {{^}}load_v4i16_sext_local: 597; R600-DAG: LDS_USHORT_READ_RET 598; R600-DAG: LDS_USHORT_READ_RET 599; R600-DAG: LDS_USHORT_READ_RET 600; R600-DAG: LDS_USHORT_READ_RET 601; R600-DAG: BFE_INT 602; R600-DAG: BFE_INT 603; R600-DAG: BFE_INT 604; R600-DAG: BFE_INT 605; SI-NOT: s_wqm_b64 606; SI: s_mov_b32 m0 607; SI: ds_read_i16 608; SI: ds_read_i16 609; SI: ds_read_i16 610; SI: ds_read_i16 611define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { 612entry: 613 %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in 614 %1 = sext <4 x i16> %0 to <4 x i32> 615 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 616 ret void 617} 618 619; load an i32 value from the local address space. 620; FUNC-LABEL: {{^}}load_i32_local: 621; R600: LDS_READ_RET 622; SI-NOT: s_wqm_b64 623; SI: s_mov_b32 m0 624; SI: ds_read_b32 625define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { 626entry: 627 %0 = load i32, i32 addrspace(3)* %in 628 store i32 %0, i32 addrspace(1)* %out 629 ret void 630} 631 632; load a f32 value from the local address space. 633; FUNC-LABEL: {{^}}load_f32_local: 634; R600: LDS_READ_RET 635; SI: s_mov_b32 m0 636; SI: ds_read_b32 637define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) { 638entry: 639 %0 = load float, float addrspace(3)* %in 640 store float %0, float addrspace(1)* %out 641 ret void 642} 643 644; load a v2f32 value from the local address space 645; FUNC-LABEL: {{^}}load_v2f32_local: 646; R600: LDS_READ_RET 647; R600: LDS_READ_RET 648; SI: s_mov_b32 m0 649; SI: ds_read_b64 650define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) { 651entry: 652 %0 = load <2 x float>, <2 x float> addrspace(3)* %in 653 store <2 x float> %0, <2 x float> addrspace(1)* %out 654 ret void 655} 656 657; Test loading a i32 and v2i32 value from the same base pointer. 658; FUNC-LABEL: {{^}}load_i32_v2i32_local: 659; R600: LDS_READ_RET 660; R600: LDS_READ_RET 661; R600: LDS_READ_RET 662; SI-DAG: ds_read_b32 663; SI-DAG: ds_read2_b32 664define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) { 665 %scalar = load i32, i32 addrspace(3)* %in 666 %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)* 667 %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2 668 %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4 669 %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0 670 %vec = add <2 x i32> %vec0, %vec1 671 store <2 x i32> %vec, <2 x i32> addrspace(1)* %out 672 ret void 673} 674 675 676@lds = addrspace(3) global [512 x i32] undef, align 4 677 678; On SI we need to make sure that the base offset is a register and not 679; an immediate. 680; FUNC-LABEL: {{^}}load_i32_local_const_ptr: 681; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0 682; SI: ds_read_b32 v0, v[[ZERO]] offset:4 683; R600: LDS_READ_RET 684define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { 685entry: 686 %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1 687 %tmp1 = load i32, i32 addrspace(3)* %tmp0 688 %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1 689 store i32 %tmp1, i32 addrspace(1)* %tmp2 690 ret void 691} 692