1; RUN: llc < %s -mtriple=i386-linux-gnu -o - | FileCheck %s 2 3; This test checks that only a single js gets generated in the final code 4; for lowering the CMOV pseudos that get created for this IR. 5; CHECK-LABEL: foo1: 6; CHECK: js 7; CHECK-NOT: js 8define i32 @foo1(i32 %v1, i32 %v2, i32 %v3) nounwind { 9entry: 10 %cmp = icmp slt i32 %v1, 0 11 %v2.v3 = select i1 %cmp, i32 %v2, i32 %v3 12 %v1.v2 = select i1 %cmp, i32 %v1, i32 %v2 13 %sub = sub i32 %v1.v2, %v2.v3 14 ret i32 %sub 15} 16 17; This test checks that only a single js gets generated in the final code 18; for lowering the CMOV pseudos that get created for this IR. This makes 19; sure the code for the lowering for opposite conditions gets tested. 20; CHECK-LABEL: foo11: 21; CHECK: js 22; CHECK-NOT: js 23; CHECK-NOT: jns 24define i32 @foo11(i32 %v1, i32 %v2, i32 %v3) nounwind { 25entry: 26 %cmp1 = icmp slt i32 %v1, 0 27 %v2.v3 = select i1 %cmp1, i32 %v2, i32 %v3 28 %cmp2 = icmp sge i32 %v1, 0 29 %v1.v2 = select i1 %cmp2, i32 %v1, i32 %v2 30 %sub = sub i32 %v1.v2, %v2.v3 31 ret i32 %sub 32} 33 34; This test checks that only a single js gets generated in the final code 35; for lowering the CMOV pseudos that get created for this IR. 36; CHECK-LABEL: foo2: 37; CHECK: js 38; CHECK-NOT: js 39define i32 @foo2(i8 %v1, i8 %v2, i8 %v3) nounwind { 40entry: 41 %cmp = icmp slt i8 %v1, 0 42 %v2.v3 = select i1 %cmp, i8 %v2, i8 %v3 43 %v1.v2 = select i1 %cmp, i8 %v1, i8 %v2 44 %t1 = sext i8 %v2.v3 to i32 45 %t2 = sext i8 %v1.v2 to i32 46 %sub = sub i32 %t1, %t2 47 ret i32 %sub 48} 49 50; This test checks that only a single js gets generated in the final code 51; for lowering the CMOV pseudos that get created for this IR. 52; CHECK-LABEL: foo3: 53; CHECK: js 54; CHECK-NOT: js 55define i32 @foo3(i16 %v1, i16 %v2, i16 %v3) nounwind { 56entry: 57 %cmp = icmp slt i16 %v1, 0 58 %v2.v3 = select i1 %cmp, i16 %v2, i16 %v3 59 %v1.v2 = select i1 %cmp, i16 %v1, i16 %v2 60 %t1 = sext i16 %v2.v3 to i32 61 %t2 = sext i16 %v1.v2 to i32 62 %sub = sub i32 %t1, %t2 63 ret i32 %sub 64} 65 66; This test checks that only a single js gets generated in the final code 67; for lowering the CMOV pseudos that get created for this IR. 68; CHECK-LABEL: foo4: 69; CHECK: js 70; CHECK-NOT: js 71define float @foo4(i32 %v1, float %v2, float %v3, float %v4) nounwind { 72entry: 73 %cmp = icmp slt i32 %v1, 0 74 %t1 = select i1 %cmp, float %v2, float %v3 75 %t2 = select i1 %cmp, float %v3, float %v4 76 %sub = fsub float %t1, %t2 77 ret float %sub 78} 79 80; This test checks that only a single je gets generated in the final code 81; for lowering the CMOV pseudos that get created for this IR. 82; CHECK-LABEL: foo5: 83; CHECK: je 84; CHECK-NOT: je 85define double @foo5(i32 %v1, double %v2, double %v3, double %v4) nounwind { 86entry: 87 %cmp = icmp eq i32 %v1, 0 88 %t1 = select i1 %cmp, double %v2, double %v3 89 %t2 = select i1 %cmp, double %v3, double %v4 90 %sub = fsub double %t1, %t2 91 ret double %sub 92} 93 94; This test checks that only a single je gets generated in the final code 95; for lowering the CMOV pseudos that get created for this IR. 96; CHECK-LABEL: foo6: 97; CHECK: je 98; CHECK-NOT: je 99define <4 x float> @foo6(i32 %v1, <4 x float> %v2, <4 x float> %v3, <4 x float> %v4) nounwind { 100entry: 101 %cmp = icmp eq i32 %v1, 0 102 %t1 = select i1 %cmp, <4 x float> %v2, <4 x float> %v3 103 %t2 = select i1 %cmp, <4 x float> %v3, <4 x float> %v4 104 %sub = fsub <4 x float> %t1, %t2 105 ret <4 x float> %sub 106} 107 108; This test checks that only a single je gets generated in the final code 109; for lowering the CMOV pseudos that get created for this IR. 110; CHECK-LABEL: foo7: 111; CHECK: je 112; CHECK-NOT: je 113define <2 x double> @foo7(i32 %v1, <2 x double> %v2, <2 x double> %v3, <2 x double> %v4) nounwind { 114entry: 115 %cmp = icmp eq i32 %v1, 0 116 %t1 = select i1 %cmp, <2 x double> %v2, <2 x double> %v3 117 %t2 = select i1 %cmp, <2 x double> %v3, <2 x double> %v4 118 %sub = fsub <2 x double> %t1, %t2 119 ret <2 x double> %sub 120} 121 122; This test checks that only a single ja gets generated in the final code 123; for lowering the CMOV pseudos that get created for this IR. This combines 124; all the supported types together into one long string of selects based 125; on the same condition. 126; CHECK-LABEL: foo8: 127; CHECK: ja 128; CHECK-NOT: ja 129define void @foo8(i32 %v1, 130 i8 %v2, i8 %v3, 131 i16 %v12, i16 %v13, 132 i32 %v22, i32 %v23, 133 float %v32, float %v33, 134 double %v42, double %v43, 135 <4 x float> %v52, <4 x float> %v53, 136 <2 x double> %v62, <2 x double> %v63, 137 <8 x float> %v72, <8 x float> %v73, 138 <4 x double> %v82, <4 x double> %v83, 139 <16 x float> %v92, <16 x float> %v93, 140 <8 x double> %v102, <8 x double> %v103, 141 i8 * %dst) nounwind { 142entry: 143 %add.ptr11 = getelementptr inbounds i8, i8* %dst, i32 2 144 %a11 = bitcast i8* %add.ptr11 to i16* 145 146 %add.ptr21 = getelementptr inbounds i8, i8* %dst, i32 4 147 %a21 = bitcast i8* %add.ptr21 to i32* 148 149 %add.ptr31 = getelementptr inbounds i8, i8* %dst, i32 8 150 %a31 = bitcast i8* %add.ptr31 to float* 151 152 %add.ptr41 = getelementptr inbounds i8, i8* %dst, i32 16 153 %a41 = bitcast i8* %add.ptr41 to double* 154 155 %add.ptr51 = getelementptr inbounds i8, i8* %dst, i32 32 156 %a51 = bitcast i8* %add.ptr51 to <4 x float>* 157 158 %add.ptr61 = getelementptr inbounds i8, i8* %dst, i32 48 159 %a61 = bitcast i8* %add.ptr61 to <2 x double>* 160 161 %add.ptr71 = getelementptr inbounds i8, i8* %dst, i32 64 162 %a71 = bitcast i8* %add.ptr71 to <8 x float>* 163 164 %add.ptr81 = getelementptr inbounds i8, i8* %dst, i32 128 165 %a81 = bitcast i8* %add.ptr81 to <4 x double>* 166 167 %add.ptr91 = getelementptr inbounds i8, i8* %dst, i32 64 168 %a91 = bitcast i8* %add.ptr91 to <16 x float>* 169 170 %add.ptr101 = getelementptr inbounds i8, i8* %dst, i32 128 171 %a101 = bitcast i8* %add.ptr101 to <8 x double>* 172 173 ; These operations are necessary, because select of two single use loads 174 ; ends up getting optimized into a select of two leas, followed by a 175 ; single load of the selected address. 176 %t13 = xor i16 %v13, 11 177 %t23 = xor i32 %v23, 1234 178 %t33 = fadd float %v33, %v32 179 %t43 = fadd double %v43, %v42 180 %t53 = fadd <4 x float> %v53, %v52 181 %t63 = fadd <2 x double> %v63, %v62 182 %t73 = fsub <8 x float> %v73, %v72 183 %t83 = fsub <4 x double> %v83, %v82 184 %t93 = fsub <16 x float> %v93, %v92 185 %t103 = fsub <8 x double> %v103, %v102 186 187 %cmp = icmp ugt i32 %v1, 31 188 %t11 = select i1 %cmp, i16 %v12, i16 %t13 189 %t21 = select i1 %cmp, i32 %v22, i32 %t23 190 %t31 = select i1 %cmp, float %v32, float %t33 191 %t41 = select i1 %cmp, double %v42, double %t43 192 %t51 = select i1 %cmp, <4 x float> %v52, <4 x float> %t53 193 %t61 = select i1 %cmp, <2 x double> %v62, <2 x double> %t63 194 %t71 = select i1 %cmp, <8 x float> %v72, <8 x float> %t73 195 %t81 = select i1 %cmp, <4 x double> %v82, <4 x double> %t83 196 %t91 = select i1 %cmp, <16 x float> %v92, <16 x float> %t93 197 %t101 = select i1 %cmp, <8 x double> %v102, <8 x double> %t103 198 199 store i16 %t11, i16* %a11, align 2 200 store i32 %t21, i32* %a21, align 4 201 store float %t31, float* %a31, align 4 202 store double %t41, double* %a41, align 8 203 store <4 x float> %t51, <4 x float>* %a51, align 16 204 store <2 x double> %t61, <2 x double>* %a61, align 16 205 store <8 x float> %t71, <8 x float>* %a71, align 32 206 store <4 x double> %t81, <4 x double>* %a81, align 32 207 store <16 x float> %t91, <16 x float>* %a91, align 32 208 store <8 x double> %t101, <8 x double>* %a101, align 32 209 210 ret void 211} 212 213; This test checks that only a single ja gets generated in the final code 214; for lowering the CMOV pseudos that get created for this IR. 215; on the same condition. 216; Contrary to my expectations, this doesn't exercise the code for 217; CMOV_V8I1, CMOV_V16I1, CMOV_V32I1, or CMOV_V64I1. Instead the selects all 218; get lowered into vector length number of selects, which all eventually turn 219; into a huge number of CMOV_GR8, which are all contiguous, so the optimization 220; kicks in as long as CMOV_GR8 is supported. I couldn't find a way to get 221; CMOV_V*I1 pseudo-opcodes to get generated. If a way exists to get CMOV_V*1 222; pseudo-opcodes to be generated, this test should be replaced with one that 223; tests those opcodes. 224; 225; CHECK-LABEL: foo9: 226; CHECK: ja 227; CHECK-NOT: ja 228define void @foo9(i32 %v1, 229 <8 x i1> %v12, <8 x i1> %v13, 230 <16 x i1> %v22, <16 x i1> %v23, 231 <32 x i1> %v32, <32 x i1> %v33, 232 <64 x i1> %v42, <64 x i1> %v43, 233 i8 * %dst) nounwind { 234entry: 235 %add.ptr11 = getelementptr inbounds i8, i8* %dst, i32 0 236 %a11 = bitcast i8* %add.ptr11 to <8 x i1>* 237 238 %add.ptr21 = getelementptr inbounds i8, i8* %dst, i32 4 239 %a21 = bitcast i8* %add.ptr21 to <16 x i1>* 240 241 %add.ptr31 = getelementptr inbounds i8, i8* %dst, i32 8 242 %a31 = bitcast i8* %add.ptr31 to <32 x i1>* 243 244 %add.ptr41 = getelementptr inbounds i8, i8* %dst, i32 16 245 %a41 = bitcast i8* %add.ptr41 to <64 x i1>* 246 247 ; These operations are necessary, because select of two single use loads 248 ; ends up getting optimized into a select of two leas, followed by a 249 ; single load of the selected address. 250 %t13 = xor <8 x i1> %v13, %v12 251 %t23 = xor <16 x i1> %v23, %v22 252 %t33 = xor <32 x i1> %v33, %v32 253 %t43 = xor <64 x i1> %v43, %v42 254 255 %cmp = icmp ugt i32 %v1, 31 256 %t11 = select i1 %cmp, <8 x i1> %v12, <8 x i1> %t13 257 %t21 = select i1 %cmp, <16 x i1> %v22, <16 x i1> %t23 258 %t31 = select i1 %cmp, <32 x i1> %v32, <32 x i1> %t33 259 %t41 = select i1 %cmp, <64 x i1> %v42, <64 x i1> %t43 260 261 store <8 x i1> %t11, <8 x i1>* %a11, align 16 262 store <16 x i1> %t21, <16 x i1>* %a21, align 4 263 store <32 x i1> %t31, <32 x i1>* %a31, align 8 264 store <64 x i1> %t41, <64 x i1>* %a41, align 16 265 266 ret void 267} 268