1; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s 2; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s 3; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s 4 5;===------------------------------------------------------------------------===; 6; GLOBAL ADDRESS SPACE 7;===------------------------------------------------------------------------===; 8 9; Load an i8 value from the global address space. 10; FUNC-LABEL: @load_i8 11; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 12 13; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, 14define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 15 %1 = load i8 addrspace(1)* %in 16 %2 = zext i8 %1 to i32 17 store i32 %2, i32 addrspace(1)* %out 18 ret void 19} 20 21; FUNC-LABEL: @load_i8_sext 22; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] 23; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 24; R600-CHECK: 24 25; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 26; R600-CHECK: 24 27; SI-CHECK: BUFFER_LOAD_SBYTE 28define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 29entry: 30 %0 = load i8 addrspace(1)* %in 31 %1 = sext i8 %0 to i32 32 store i32 %1, i32 addrspace(1)* %out 33 ret void 34} 35 36; FUNC-LABEL: @load_v2i8 37; R600-CHECK: VTX_READ_8 38; R600-CHECK: VTX_READ_8 39; SI-CHECK: BUFFER_LOAD_UBYTE 40; SI-CHECK: BUFFER_LOAD_UBYTE 41define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { 42entry: 43 %0 = load <2 x i8> addrspace(1)* %in 44 %1 = zext <2 x i8> %0 to <2 x i32> 45 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 46 ret void 47} 48 49; FUNC-LABEL: @load_v2i8_sext 50; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 51; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 52; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 53; R600-CHECK-DAG: 24 54; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 55; R600-CHECK-DAG: 24 56; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 57; R600-CHECK-DAG: 24 58; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 59; R600-CHECK-DAG: 24 60; SI-CHECK: BUFFER_LOAD_SBYTE 61; SI-CHECK: BUFFER_LOAD_SBYTE 62define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { 63entry: 64 %0 = load <2 x i8> addrspace(1)* %in 65 %1 = sext <2 x i8> %0 to <2 x i32> 66 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 67 ret void 68} 69 70; FUNC-LABEL: @load_v4i8 71; R600-CHECK: VTX_READ_8 72; R600-CHECK: VTX_READ_8 73; R600-CHECK: VTX_READ_8 74; R600-CHECK: VTX_READ_8 75; SI-CHECK: BUFFER_LOAD_UBYTE 76; SI-CHECK: BUFFER_LOAD_UBYTE 77; SI-CHECK: BUFFER_LOAD_UBYTE 78; SI-CHECK: BUFFER_LOAD_UBYTE 79define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { 80entry: 81 %0 = load <4 x i8> addrspace(1)* %in 82 %1 = zext <4 x i8> %0 to <4 x i32> 83 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 84 ret void 85} 86 87; FUNC-LABEL: @load_v4i8_sext 88; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 89; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 90; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] 91; R600-CHECK-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] 92; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 93; R600-CHECK-DAG: 24 94; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 95; R600-CHECK-DAG: 24 96; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 97; R600-CHECK-DAG: 24 98; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 99; R600-CHECK-DAG: 24 100; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] 101; R600-CHECK-DAG: 24 102; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] 103; R600-CHECK-DAG: 24 104; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] 105; R600-CHECK-DAG: 24 106; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] 107; R600-CHECK-DAG: 24 108; SI-CHECK: BUFFER_LOAD_SBYTE 109; SI-CHECK: BUFFER_LOAD_SBYTE 110; SI-CHECK: BUFFER_LOAD_SBYTE 111; SI-CHECK: BUFFER_LOAD_SBYTE 112define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { 113entry: 114 %0 = load <4 x i8> addrspace(1)* %in 115 %1 = sext <4 x i8> %0 to <4 x i32> 116 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 117 ret void 118} 119 120; Load an i16 value from the global address space. 121; FUNC-LABEL: @load_i16 122; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 123; SI-CHECK: BUFFER_LOAD_USHORT 124define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { 125entry: 126 %0 = load i16 addrspace(1)* %in 127 %1 = zext i16 %0 to i32 128 store i32 %1, i32 addrspace(1)* %out 129 ret void 130} 131 132; FUNC-LABEL: @load_i16_sext 133; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] 134; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 135; R600-CHECK: 16 136; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 137; R600-CHECK: 16 138; SI-CHECK: BUFFER_LOAD_SSHORT 139define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { 140entry: 141 %0 = load i16 addrspace(1)* %in 142 %1 = sext i16 %0 to i32 143 store i32 %1, i32 addrspace(1)* %out 144 ret void 145} 146 147; FUNC-LABEL: @load_v2i16 148; R600-CHECK: VTX_READ_16 149; R600-CHECK: VTX_READ_16 150; SI-CHECK: BUFFER_LOAD_USHORT 151; SI-CHECK: BUFFER_LOAD_USHORT 152define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 153entry: 154 %0 = load <2 x i16> addrspace(1)* %in 155 %1 = zext <2 x i16> %0 to <2 x i32> 156 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 157 ret void 158} 159 160; FUNC-LABEL: @load_v2i16_sext 161; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 162; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 163; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 164; R600-CHECK-DAG: 16 165; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 166; R600-CHECK-DAG: 16 167; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 168; R600-CHECK-DAG: 16 169; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 170; R600-CHECK-DAG: 16 171; SI-CHECK: BUFFER_LOAD_SSHORT 172; SI-CHECK: BUFFER_LOAD_SSHORT 173define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 174entry: 175 %0 = load <2 x i16> addrspace(1)* %in 176 %1 = sext <2 x i16> %0 to <2 x i32> 177 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 178 ret void 179} 180 181; FUNC-LABEL: @load_v4i16 182; R600-CHECK: VTX_READ_16 183; R600-CHECK: VTX_READ_16 184; R600-CHECK: VTX_READ_16 185; R600-CHECK: VTX_READ_16 186; SI-CHECK: BUFFER_LOAD_USHORT 187; SI-CHECK: BUFFER_LOAD_USHORT 188; SI-CHECK: BUFFER_LOAD_USHORT 189; SI-CHECK: BUFFER_LOAD_USHORT 190define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 191entry: 192 %0 = load <4 x i16> addrspace(1)* %in 193 %1 = zext <4 x i16> %0 to <4 x i32> 194 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 195 ret void 196} 197 198; FUNC-LABEL: @load_v4i16_sext 199; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 200; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 201; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] 202; R600-CHECK-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] 203; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 204; R600-CHECK-DAG: 16 205; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 206; R600-CHECK-DAG: 16 207; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 208; R600-CHECK-DAG: 16 209; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 210; R600-CHECK-DAG: 16 211; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] 212; R600-CHECK-DAG: 16 213; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] 214; R600-CHECK-DAG: 16 215; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] 216; R600-CHECK-DAG: 16 217; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] 218; R600-CHECK-DAG: 16 219; SI-CHECK: BUFFER_LOAD_SSHORT 220; SI-CHECK: BUFFER_LOAD_SSHORT 221; SI-CHECK: BUFFER_LOAD_SSHORT 222; SI-CHECK: BUFFER_LOAD_SSHORT 223define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 224entry: 225 %0 = load <4 x i16> addrspace(1)* %in 226 %1 = sext <4 x i16> %0 to <4 x i32> 227 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 228 ret void 229} 230 231; load an i32 value from the global address space. 232; FUNC-LABEL: @load_i32 233; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 234 235; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}} 236define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 237entry: 238 %0 = load i32 addrspace(1)* %in 239 store i32 %0, i32 addrspace(1)* %out 240 ret void 241} 242 243; load a f32 value from the global address space. 244; FUNC-LABEL: @load_f32 245; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 246 247; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}} 248define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 249entry: 250 %0 = load float addrspace(1)* %in 251 store float %0, float addrspace(1)* %out 252 ret void 253} 254 255; load a v2f32 value from the global address space 256; FUNC-LABEL: @load_v2f32 257; R600-CHECK: VTX_READ_64 258 259; SI-CHECK: BUFFER_LOAD_DWORDX2 260define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { 261entry: 262 %0 = load <2 x float> addrspace(1)* %in 263 store <2 x float> %0, <2 x float> addrspace(1)* %out 264 ret void 265} 266 267; FUNC-LABEL: @load_i64 268; R600-CHECK: MEM_RAT 269; R600-CHECK: MEM_RAT 270 271; SI-CHECK: BUFFER_LOAD_DWORDX2 272define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 273entry: 274 %0 = load i64 addrspace(1)* %in 275 store i64 %0, i64 addrspace(1)* %out 276 ret void 277} 278 279; FUNC-LABEL: @load_i64_sext 280; R600-CHECK: MEM_RAT 281; R600-CHECK: MEM_RAT 282; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x 283; R600-CHECK: 31 284; SI-CHECK: BUFFER_LOAD_DWORD 285 286define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { 287entry: 288 %0 = load i32 addrspace(1)* %in 289 %1 = sext i32 %0 to i64 290 store i64 %1, i64 addrspace(1)* %out 291 ret void 292} 293 294; FUNC-LABEL: @load_i64_zext 295; R600-CHECK: MEM_RAT 296; R600-CHECK: MEM_RAT 297define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { 298entry: 299 %0 = load i32 addrspace(1)* %in 300 %1 = zext i32 %0 to i64 301 store i64 %1, i64 addrspace(1)* %out 302 ret void 303} 304 305; FUNC-LABEL: @load_v8i32 306; R600-CHECK: VTX_READ_128 307; R600-CHECK: VTX_READ_128 308; XXX: We should be using DWORDX4 instructions on SI. 309; SI-CHECK: BUFFER_LOAD_DWORD 310; SI-CHECK: BUFFER_LOAD_DWORD 311; SI-CHECK: BUFFER_LOAD_DWORD 312; SI-CHECK: BUFFER_LOAD_DWORD 313; SI-CHECK: BUFFER_LOAD_DWORD 314; SI-CHECK: BUFFER_LOAD_DWORD 315; SI-CHECK: BUFFER_LOAD_DWORD 316; SI-CHECK: BUFFER_LOAD_DWORD 317define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) { 318entry: 319 %0 = load <8 x i32> addrspace(1)* %in 320 store <8 x i32> %0, <8 x i32> addrspace(1)* %out 321 ret void 322} 323 324; FUNC-LABEL: @load_v16i32 325; R600-CHECK: VTX_READ_128 326; R600-CHECK: VTX_READ_128 327; R600-CHECK: VTX_READ_128 328; R600-CHECK: VTX_READ_128 329; XXX: We should be using DWORDX4 instructions on SI. 330; SI-CHECK: BUFFER_LOAD_DWORD 331; SI-CHECK: BUFFER_LOAD_DWORD 332; SI-CHECK: BUFFER_LOAD_DWORD 333; SI-CHECK: BUFFER_LOAD_DWORD 334; SI-CHECK: BUFFER_LOAD_DWORD 335; SI-CHECK: BUFFER_LOAD_DWORD 336; SI-CHECK: BUFFER_LOAD_DWORD 337; SI-CHECK: BUFFER_LOAD_DWORD 338; SI-CHECK: BUFFER_LOAD_DWORD 339; SI-CHECK: BUFFER_LOAD_DWORD 340; SI-CHECK: BUFFER_LOAD_DWORD 341; SI-CHECK: BUFFER_LOAD_DWORD 342; SI-CHECK: BUFFER_LOAD_DWORD 343; SI-CHECK: BUFFER_LOAD_DWORD 344; SI-CHECK: BUFFER_LOAD_DWORD 345; SI-CHECK: BUFFER_LOAD_DWORD 346define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) { 347entry: 348 %0 = load <16 x i32> addrspace(1)* %in 349 store <16 x i32> %0, <16 x i32> addrspace(1)* %out 350 ret void 351} 352 353;===------------------------------------------------------------------------===; 354; CONSTANT ADDRESS SPACE 355;===------------------------------------------------------------------------===; 356 357; Load a sign-extended i8 value 358; FUNC-LABEL: @load_const_i8_sext 359; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] 360; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 361; R600-CHECK: 24 362; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 363; R600-CHECK: 24 364; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}}, 365define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 366entry: 367 %0 = load i8 addrspace(2)* %in 368 %1 = sext i8 %0 to i32 369 store i32 %1, i32 addrspace(1)* %out 370 ret void 371} 372 373; Load an aligned i8 value 374; FUNC-LABEL: @load_const_i8_aligned 375; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 376; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, 377define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 378entry: 379 %0 = load i8 addrspace(2)* %in 380 %1 = zext i8 %0 to i32 381 store i32 %1, i32 addrspace(1)* %out 382 ret void 383} 384 385; Load an un-aligned i8 value 386; FUNC-LABEL: @load_const_i8_unaligned 387; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 388; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, 389define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 390entry: 391 %0 = getelementptr i8 addrspace(2)* %in, i32 1 392 %1 = load i8 addrspace(2)* %0 393 %2 = zext i8 %1 to i32 394 store i32 %2, i32 addrspace(1)* %out 395 ret void 396} 397 398; Load a sign-extended i16 value 399; FUNC-LABEL: @load_const_i16_sext 400; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] 401; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 402; R600-CHECK: 16 403; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 404; R600-CHECK: 16 405; SI-CHECK: BUFFER_LOAD_SSHORT 406define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 407entry: 408 %0 = load i16 addrspace(2)* %in 409 %1 = sext i16 %0 to i32 410 store i32 %1, i32 addrspace(1)* %out 411 ret void 412} 413 414; Load an aligned i16 value 415; FUNC-LABEL: @load_const_i16_aligned 416; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 417; SI-CHECK: BUFFER_LOAD_USHORT 418define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 419entry: 420 %0 = load i16 addrspace(2)* %in 421 %1 = zext i16 %0 to i32 422 store i32 %1, i32 addrspace(1)* %out 423 ret void 424} 425 426; Load an un-aligned i16 value 427; FUNC-LABEL: @load_const_i16_unaligned 428; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 429; SI-CHECK: BUFFER_LOAD_USHORT 430define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 431entry: 432 %0 = getelementptr i16 addrspace(2)* %in, i32 1 433 %1 = load i16 addrspace(2)* %0 434 %2 = zext i16 %1 to i32 435 store i32 %2, i32 addrspace(1)* %out 436 ret void 437} 438 439; Load an i32 value from the constant address space. 440; FUNC-LABEL: @load_const_addrspace_i32 441; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 442 443; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}} 444define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 445entry: 446 %0 = load i32 addrspace(2)* %in 447 store i32 %0, i32 addrspace(1)* %out 448 ret void 449} 450 451; Load a f32 value from the constant address space. 452; FUNC-LABEL: @load_const_addrspace_f32 453; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 454 455; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}} 456define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) { 457 %1 = load float addrspace(2)* %in 458 store float %1, float addrspace(1)* %out 459 ret void 460} 461 462;===------------------------------------------------------------------------===; 463; LOCAL ADDRESS SPACE 464;===------------------------------------------------------------------------===; 465 466; Load an i8 value from the local address space. 467; FUNC-LABEL: @load_i8_local 468; R600-CHECK: LDS_UBYTE_READ_RET 469; SI-CHECK-NOT: S_WQM_B64 470; SI-CHECK: S_MOV_B32 m0 471; SI-CHECK: DS_READ_U8 472define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { 473 %1 = load i8 addrspace(3)* %in 474 %2 = zext i8 %1 to i32 475 store i32 %2, i32 addrspace(1)* %out 476 ret void 477} 478 479; FUNC-LABEL: @load_i8_sext_local 480; R600-CHECK: LDS_UBYTE_READ_RET 481; R600-CHECK: ASHR 482; SI-CHECK-NOT: S_WQM_B64 483; SI-CHECK: S_MOV_B32 m0 484; SI-CHECK: DS_READ_I8 485define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { 486entry: 487 %0 = load i8 addrspace(3)* %in 488 %1 = sext i8 %0 to i32 489 store i32 %1, i32 addrspace(1)* %out 490 ret void 491} 492 493; FUNC-LABEL: @load_v2i8_local 494; R600-CHECK: LDS_UBYTE_READ_RET 495; R600-CHECK: LDS_UBYTE_READ_RET 496; SI-CHECK-NOT: S_WQM_B64 497; SI-CHECK: S_MOV_B32 m0 498; SI-CHECK: DS_READ_U8 499; SI-CHECK: DS_READ_U8 500define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { 501entry: 502 %0 = load <2 x i8> addrspace(3)* %in 503 %1 = zext <2 x i8> %0 to <2 x i32> 504 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 505 ret void 506} 507 508; FUNC-LABEL: @load_v2i8_sext_local 509; R600-CHECK-DAG: LDS_UBYTE_READ_RET 510; R600-CHECK-DAG: LDS_UBYTE_READ_RET 511; R600-CHECK-DAG: ASHR 512; R600-CHECK-DAG: ASHR 513; SI-CHECK-NOT: S_WQM_B64 514; SI-CHECK: S_MOV_B32 m0 515; SI-CHECK: DS_READ_I8 516; SI-CHECK: DS_READ_I8 517define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { 518entry: 519 %0 = load <2 x i8> addrspace(3)* %in 520 %1 = sext <2 x i8> %0 to <2 x i32> 521 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 522 ret void 523} 524 525; FUNC-LABEL: @load_v4i8_local 526; R600-CHECK: LDS_UBYTE_READ_RET 527; R600-CHECK: LDS_UBYTE_READ_RET 528; R600-CHECK: LDS_UBYTE_READ_RET 529; R600-CHECK: LDS_UBYTE_READ_RET 530; SI-CHECK-NOT: S_WQM_B64 531; SI-CHECK: S_MOV_B32 m0 532; SI-CHECK: DS_READ_U8 533; SI-CHECK: DS_READ_U8 534; SI-CHECK: DS_READ_U8 535; SI-CHECK: DS_READ_U8 536define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { 537entry: 538 %0 = load <4 x i8> addrspace(3)* %in 539 %1 = zext <4 x i8> %0 to <4 x i32> 540 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 541 ret void 542} 543 544; FUNC-LABEL: @load_v4i8_sext_local 545; R600-CHECK-DAG: LDS_UBYTE_READ_RET 546; R600-CHECK-DAG: LDS_UBYTE_READ_RET 547; R600-CHECK-DAG: LDS_UBYTE_READ_RET 548; R600-CHECK-DAG: LDS_UBYTE_READ_RET 549; R600-CHECK-DAG: ASHR 550; R600-CHECK-DAG: ASHR 551; R600-CHECK-DAG: ASHR 552; R600-CHECK-DAG: ASHR 553; SI-CHECK-NOT: S_WQM_B64 554; SI-CHECK: S_MOV_B32 m0 555; SI-CHECK: DS_READ_I8 556; SI-CHECK: DS_READ_I8 557; SI-CHECK: DS_READ_I8 558; SI-CHECK: DS_READ_I8 559define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { 560entry: 561 %0 = load <4 x i8> addrspace(3)* %in 562 %1 = sext <4 x i8> %0 to <4 x i32> 563 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 564 ret void 565} 566 567; Load an i16 value from the local address space. 568; FUNC-LABEL: @load_i16_local 569; R600-CHECK: LDS_USHORT_READ_RET 570; SI-CHECK-NOT: S_WQM_B64 571; SI-CHECK: S_MOV_B32 m0 572; SI-CHECK: DS_READ_U16 573define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { 574entry: 575 %0 = load i16 addrspace(3)* %in 576 %1 = zext i16 %0 to i32 577 store i32 %1, i32 addrspace(1)* %out 578 ret void 579} 580 581; FUNC-LABEL: @load_i16_sext_local 582; R600-CHECK: LDS_USHORT_READ_RET 583; R600-CHECK: ASHR 584; SI-CHECK-NOT: S_WQM_B64 585; SI-CHECK: S_MOV_B32 m0 586; SI-CHECK: DS_READ_I16 587define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { 588entry: 589 %0 = load i16 addrspace(3)* %in 590 %1 = sext i16 %0 to i32 591 store i32 %1, i32 addrspace(1)* %out 592 ret void 593} 594 595; FUNC-LABEL: @load_v2i16_local 596; R600-CHECK: LDS_USHORT_READ_RET 597; R600-CHECK: LDS_USHORT_READ_RET 598; SI-CHECK-NOT: S_WQM_B64 599; SI-CHECK: S_MOV_B32 m0 600; SI-CHECK: DS_READ_U16 601; SI-CHECK: DS_READ_U16 602define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { 603entry: 604 %0 = load <2 x i16> addrspace(3)* %in 605 %1 = zext <2 x i16> %0 to <2 x i32> 606 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 607 ret void 608} 609 610; FUNC-LABEL: @load_v2i16_sext_local 611; R600-CHECK-DAG: LDS_USHORT_READ_RET 612; R600-CHECK-DAG: LDS_USHORT_READ_RET 613; R600-CHECK-DAG: ASHR 614; R600-CHECK-DAG: ASHR 615; SI-CHECK-NOT: S_WQM_B64 616; SI-CHECK: S_MOV_B32 m0 617; SI-CHECK: DS_READ_I16 618; SI-CHECK: DS_READ_I16 619define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { 620entry: 621 %0 = load <2 x i16> addrspace(3)* %in 622 %1 = sext <2 x i16> %0 to <2 x i32> 623 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 624 ret void 625} 626 627; FUNC-LABEL: @load_v4i16_local 628; R600-CHECK: LDS_USHORT_READ_RET 629; R600-CHECK: LDS_USHORT_READ_RET 630; R600-CHECK: LDS_USHORT_READ_RET 631; R600-CHECK: LDS_USHORT_READ_RET 632; SI-CHECK-NOT: S_WQM_B64 633; SI-CHECK: S_MOV_B32 m0 634; SI-CHECK: DS_READ_U16 635; SI-CHECK: DS_READ_U16 636; SI-CHECK: DS_READ_U16 637; SI-CHECK: DS_READ_U16 638define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { 639entry: 640 %0 = load <4 x i16> addrspace(3)* %in 641 %1 = zext <4 x i16> %0 to <4 x i32> 642 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 643 ret void 644} 645 646; FUNC-LABEL: @load_v4i16_sext_local 647; R600-CHECK-DAG: LDS_USHORT_READ_RET 648; R600-CHECK-DAG: LDS_USHORT_READ_RET 649; R600-CHECK-DAG: LDS_USHORT_READ_RET 650; R600-CHECK-DAG: LDS_USHORT_READ_RET 651; R600-CHECK-DAG: ASHR 652; R600-CHECK-DAG: ASHR 653; R600-CHECK-DAG: ASHR 654; R600-CHECK-DAG: ASHR 655; SI-CHECK-NOT: S_WQM_B64 656; SI-CHECK: S_MOV_B32 m0 657; SI-CHECK: DS_READ_I16 658; SI-CHECK: DS_READ_I16 659; SI-CHECK: DS_READ_I16 660; SI-CHECK: DS_READ_I16 661define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { 662entry: 663 %0 = load <4 x i16> addrspace(3)* %in 664 %1 = sext <4 x i16> %0 to <4 x i32> 665 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 666 ret void 667} 668 669; load an i32 value from the local address space. 670; FUNC-LABEL: @load_i32_local 671; R600-CHECK: LDS_READ_RET 672; SI-CHECK-NOT: S_WQM_B64 673; SI-CHECK: S_MOV_B32 m0 674; SI-CHECK: DS_READ_B32 675define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { 676entry: 677 %0 = load i32 addrspace(3)* %in 678 store i32 %0, i32 addrspace(1)* %out 679 ret void 680} 681 682; load a f32 value from the local address space. 683; FUNC-LABEL: @load_f32_local 684; R600-CHECK: LDS_READ_RET 685; SI-CHECK: S_MOV_B32 m0 686; SI-CHECK: DS_READ_B32 687define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) { 688entry: 689 %0 = load float addrspace(3)* %in 690 store float %0, float addrspace(1)* %out 691 ret void 692} 693 694; load a v2f32 value from the local address space 695; FUNC-LABEL: @load_v2f32_local 696; R600-CHECK: LDS_READ_RET 697; R600-CHECK: LDS_READ_RET 698; SI-CHECK: S_MOV_B32 m0 699; SI-CHECK: DS_READ_B64 700define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) { 701entry: 702 %0 = load <2 x float> addrspace(3)* %in 703 store <2 x float> %0, <2 x float> addrspace(1)* %out 704 ret void 705} 706