1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 3 4; This test just checks that the compiler doesn't crash. 5 6define amdgpu_kernel void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) { 7; EG-LABEL: i8ptr_v16i8ptr: 8; EG: ; %bb.0: ; %entry 9; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 10; EG-NEXT: TEX 0 @6 11; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 12; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 13; EG-NEXT: CF_END 14; EG-NEXT: PAD 15; EG-NEXT: Fetch clause starting at 6: 16; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 17; EG-NEXT: ALU clause starting at 8: 18; EG-NEXT: MOV * T0.X, KC0[2].Z, 19; EG-NEXT: ALU clause starting at 9: 20; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 21; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 22entry: 23 %0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)* 24 %1 = load <16 x i8>, <16 x i8> addrspace(1)* %0 25 store <16 x i8> %1, <16 x i8> addrspace(1)* %out 26 ret void 27} 28 29define amdgpu_kernel void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind { 30; EG-LABEL: f32_to_v2i16: 31; EG: ; %bb.0: 32; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 33; EG-NEXT: TEX 0 @6 34; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 35; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 36; EG-NEXT: CF_END 37; EG-NEXT: PAD 38; EG-NEXT: Fetch clause starting at 6: 39; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 40; EG-NEXT: ALU clause starting at 8: 41; EG-NEXT: MOV * T0.X, KC0[2].Z, 42; EG-NEXT: ALU clause starting at 9: 43; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 44; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 45 %load = load float, float addrspace(1)* %in, align 4 46 %bc = bitcast float %load to <2 x i16> 47 store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4 48 ret void 49} 50 51define amdgpu_kernel void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind { 52; EG-LABEL: v2i16_to_f32: 53; EG: ; %bb.0: 54; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 55; EG-NEXT: TEX 0 @6 56; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 57; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 58; EG-NEXT: CF_END 59; EG-NEXT: PAD 60; EG-NEXT: Fetch clause starting at 6: 61; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 62; EG-NEXT: ALU clause starting at 8: 63; EG-NEXT: MOV * T0.X, KC0[2].Z, 64; EG-NEXT: ALU clause starting at 9: 65; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 66; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 67 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4 68 %bc = bitcast <2 x i16> %load to float 69 store float %bc, float addrspace(1)* %out, align 4 70 ret void 71} 72 73define amdgpu_kernel void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind { 74; EG-LABEL: v4i8_to_i32: 75; EG: ; %bb.0: 76; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 77; EG-NEXT: TEX 0 @6 78; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 79; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 80; EG-NEXT: CF_END 81; EG-NEXT: PAD 82; EG-NEXT: Fetch clause starting at 6: 83; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 84; EG-NEXT: ALU clause starting at 8: 85; EG-NEXT: MOV * T0.X, KC0[2].Z, 86; EG-NEXT: ALU clause starting at 9: 87; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 88; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 89 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4 90 %bc = bitcast <4 x i8> %load to i32 91 store i32 %bc, i32 addrspace(1)* %out, align 4 92 ret void 93} 94 95define amdgpu_kernel void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { 96; EG-LABEL: i32_to_v4i8: 97; EG: ; %bb.0: 98; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 99; EG-NEXT: TEX 0 @6 100; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 101; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 102; EG-NEXT: CF_END 103; EG-NEXT: PAD 104; EG-NEXT: Fetch clause starting at 6: 105; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 106; EG-NEXT: ALU clause starting at 8: 107; EG-NEXT: MOV * T0.X, KC0[2].Z, 108; EG-NEXT: ALU clause starting at 9: 109; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 110; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 111 %load = load i32, i32 addrspace(1)* %in, align 4 112 %bc = bitcast i32 %load to <4 x i8> 113 store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4 114 ret void 115} 116 117define amdgpu_kernel void @v2i16_to_v4i8(<4 x i8> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind { 118; EG-LABEL: v2i16_to_v4i8: 119; EG: ; %bb.0: 120; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 121; EG-NEXT: TEX 0 @6 122; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 123; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 124; EG-NEXT: CF_END 125; EG-NEXT: PAD 126; EG-NEXT: Fetch clause starting at 6: 127; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 128; EG-NEXT: ALU clause starting at 8: 129; EG-NEXT: MOV * T0.X, KC0[2].Z, 130; EG-NEXT: ALU clause starting at 9: 131; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 132; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 133 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4 134 %bc = bitcast <2 x i16> %load to <4 x i8> 135 store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4 136 ret void 137} 138 139; This just checks for crash in BUILD_VECTOR/EXTRACT_ELEMENT combine 140; the stack manipulation is tricky to follow 141; TODO: This should only use one load 142define amdgpu_kernel void @v4i16_extract_i8(i8 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind { 143; EG-LABEL: v4i16_extract_i8: 144; EG: ; %bb.0: 145; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 146; EG-NEXT: TEX 1 @6 147; EG-NEXT: ALU 17, @11, KC0[CB0:0-32], KC1[] 148; EG-NEXT: MEM_RAT MSKOR T5.XW, T6.X 149; EG-NEXT: CF_END 150; EG-NEXT: PAD 151; EG-NEXT: Fetch clause starting at 6: 152; EG-NEXT: VTX_READ_16 T6.X, T5.X, 6, #1 153; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1 154; EG-NEXT: ALU clause starting at 10: 155; EG-NEXT: MOV * T5.X, KC0[2].Z, 156; EG-NEXT: ALU clause starting at 11: 157; EG-NEXT: LSHL * T0.W, T6.X, literal.x, 158; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 159; EG-NEXT: OR_INT * T0.W, PV.W, T5.X, 160; EG-NEXT: MOV * T3.X, PV.W, 161; EG-NEXT: MOV T0.Y, PV.X, 162; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 163; EG-NEXT: MOV * T1.W, literal.y, 164; EG-NEXT: 3(4.203895e-45), 8(1.121039e-44) 165; EG-NEXT: BFE_UINT T1.W, PV.Y, literal.x, PS, 166; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 167; EG-NEXT: 8(1.121039e-44), 3(4.203895e-45) 168; EG-NEXT: LSHL T5.X, PV.W, PS, 169; EG-NEXT: LSHL * T5.W, literal.x, PS, 170; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 171; EG-NEXT: MOV T5.Y, 0.0, 172; EG-NEXT: MOV * T5.Z, 0.0, 173; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x, 174; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 175 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 2 176 %bc = bitcast <4 x i16> %load to <8 x i8> 177 %element = extractelement <8 x i8> %bc, i32 5 178 store i8 %element, i8 addrspace(1)* %out 179 ret void 180} 181 182define amdgpu_kernel void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { 183; EG-LABEL: bitcast_v2i32_to_f64: 184; EG: ; %bb.0: 185; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 186; EG-NEXT: TEX 0 @6 187; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 188; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 189; EG-NEXT: CF_END 190; EG-NEXT: PAD 191; EG-NEXT: Fetch clause starting at 6: 192; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 193; EG-NEXT: ALU clause starting at 8: 194; EG-NEXT: MOV * T0.X, KC0[2].Z, 195; EG-NEXT: ALU clause starting at 9: 196; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 197; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 198 %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8 199 %bc = bitcast <2 x i32> %val to double 200 store double %bc, double addrspace(1)* %out, align 8 201 ret void 202} 203 204