1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -S | FileCheck %s 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 5define float @test_rcp_ss_0(float %a) { 6; CHECK-LABEL: @test_rcp_ss_0( 7; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 8; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]]) 9; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 10; CHECK-NEXT: ret float [[TMP3]] 11; 12 %1 = insertelement <4 x float> undef, float %a, i32 0 13 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 14 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 15 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 16 %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4) 17 %6 = extractelement <4 x float> %5, i32 0 18 ret float %6 19} 20 21define float @test_rcp_ss_1(float %a) { 22; CHECK-LABEL: @test_rcp_ss_1( 23; CHECK-NEXT: ret float 1.000000e+00 24; 25 %1 = insertelement <4 x float> undef, float %a, i32 0 26 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 27 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 28 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 29 %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4) 30 %6 = extractelement <4 x float> %5, i32 1 31 ret float %6 32} 33 34define float @test_sqrt_ss_0(float %a) { 35; CHECK-LABEL: @test_sqrt_ss_0( 36; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 37; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> [[TMP1]]) 38; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 39; CHECK-NEXT: ret float [[TMP3]] 40; 41 %1 = insertelement <4 x float> undef, float %a, i32 0 42 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 43 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 44 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 45 %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4) 46 %6 = extractelement <4 x float> %5, i32 0 47 ret float %6 48} 49 50define float @test_sqrt_ss_2(float %a) { 51; CHECK-LABEL: @test_sqrt_ss_2( 52; CHECK-NEXT: ret float 2.000000e+00 53; 54 %1 = insertelement <4 x float> undef, float %a, i32 0 55 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 56 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 57 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 58 %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4) 59 %6 = extractelement <4 x float> %5, i32 2 60 ret float %6 61} 62 63define float @test_rsqrt_ss_0(float %a) { 64; CHECK-LABEL: @test_rsqrt_ss_0( 65; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 66; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]]) 67; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 68; CHECK-NEXT: ret float [[TMP3]] 69; 70 %1 = insertelement <4 x float> undef, float %a, i32 0 71 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 72 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 73 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 74 %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4) 75 %6 = extractelement <4 x float> %5, i32 0 76 ret float %6 77} 78 79define float @test_rsqrt_ss_3(float %a) { 80; CHECK-LABEL: @test_rsqrt_ss_3( 81; CHECK-NEXT: ret float 3.000000e+00 82; 83 %1 = insertelement <4 x float> undef, float %a, i32 0 84 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 85 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 86 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 87 %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4) 88 %6 = extractelement <4 x float> %5, i32 3 89 ret float %6 90} 91 92define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { 93; CHECK-LABEL: @test_add_ss( 94; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %b) 95; CHECK-NEXT: ret <4 x float> [[TMP1]] 96; 97 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 98 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 99 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 100 %4 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %3) 101 ret <4 x float> %4 102} 103 104define float @test_add_ss_0(float %a, float %b) { 105; CHECK-LABEL: @test_add_ss_0( 106; CHECK-NEXT: [[TMP1:%.*]] = fadd float %a, %b 107; CHECK-NEXT: ret float [[TMP1]] 108; 109 %1 = insertelement <4 x float> undef, float %a, i32 0 110 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 111 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 112 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 113 %5 = insertelement <4 x float> undef, float %b, i32 0 114 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 115 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 116 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 117 %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8) 118 %r = extractelement <4 x float> %9, i32 0 119 ret float %r 120} 121 122define float @test_add_ss_1(float %a, float %b) { 123; CHECK-LABEL: @test_add_ss_1( 124; CHECK-NEXT: ret float 1.000000e+00 125; 126 %1 = insertelement <4 x float> undef, float %a, i32 0 127 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 128 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 129 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 130 %5 = insertelement <4 x float> undef, float %b, i32 0 131 %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5) 132 %7 = extractelement <4 x float> %6, i32 1 133 ret float %7 134} 135 136define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { 137; CHECK-LABEL: @test_sub_ss( 138; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %b) 139; CHECK-NEXT: ret <4 x float> [[TMP1]] 140; 141 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 142 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 143 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 144 %4 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %3) 145 ret <4 x float> %4 146} 147 148define float @test_sub_ss_0(float %a, float %b) { 149; CHECK-LABEL: @test_sub_ss_0( 150; CHECK-NEXT: [[TMP1:%.*]] = fsub float %a, %b 151; CHECK-NEXT: ret float [[TMP1]] 152; 153 %1 = insertelement <4 x float> undef, float %a, i32 0 154 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 155 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 156 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 157 %5 = insertelement <4 x float> undef, float %b, i32 0 158 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 159 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 160 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 161 %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8) 162 %r = extractelement <4 x float> %9, i32 0 163 ret float %r 164} 165 166define float @test_sub_ss_2(float %a, float %b) { 167; CHECK-LABEL: @test_sub_ss_2( 168; CHECK-NEXT: ret float 2.000000e+00 169; 170 %1 = insertelement <4 x float> undef, float %a, i32 0 171 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 172 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 173 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 174 %5 = insertelement <4 x float> undef, float %b, i32 0 175 %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5) 176 %7 = extractelement <4 x float> %6, i32 2 177 ret float %7 178} 179 180define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { 181; CHECK-LABEL: @test_mul_ss( 182; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %b) 183; CHECK-NEXT: ret <4 x float> [[TMP1]] 184; 185 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 186 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 187 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 188 %4 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %3) 189 ret <4 x float> %4 190} 191 192define float @test_mul_ss_0(float %a, float %b) { 193; CHECK-LABEL: @test_mul_ss_0( 194; CHECK-NEXT: [[TMP1:%.*]] = fmul float %a, %b 195; CHECK-NEXT: ret float [[TMP1]] 196; 197 %1 = insertelement <4 x float> undef, float %a, i32 0 198 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 199 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 200 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 201 %5 = insertelement <4 x float> undef, float %b, i32 0 202 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 203 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 204 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 205 %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8) 206 %r = extractelement <4 x float> %9, i32 0 207 ret float %r 208} 209 210define float @test_mul_ss_3(float %a, float %b) { 211; CHECK-LABEL: @test_mul_ss_3( 212; CHECK-NEXT: ret float 3.000000e+00 213; 214 %1 = insertelement <4 x float> undef, float %a, i32 0 215 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 216 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 217 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 218 %5 = insertelement <4 x float> undef, float %b, i32 0 219 %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5) 220 %7 = extractelement <4 x float> %6, i32 3 221 ret float %7 222} 223 224define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { 225; CHECK-LABEL: @test_div_ss( 226; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %b) 227; CHECK-NEXT: ret <4 x float> [[TMP1]] 228; 229 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 230 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 231 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 232 %4 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %3) 233 ret <4 x float> %4 234} 235 236define float @test_div_ss_0(float %a, float %b) { 237; CHECK-LABEL: @test_div_ss_0( 238; CHECK-NEXT: [[TMP1:%.*]] = fdiv float %a, %b 239; CHECK-NEXT: ret float [[TMP1]] 240; 241 %1 = insertelement <4 x float> undef, float %a, i32 0 242 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 243 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 244 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 245 %5 = insertelement <4 x float> undef, float %b, i32 0 246 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 247 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 248 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 249 %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8) 250 %r = extractelement <4 x float> %9, i32 0 251 ret float %r 252} 253 254define float @test_div_ss_1(float %a, float %b) { 255; CHECK-LABEL: @test_div_ss_1( 256; CHECK-NEXT: ret float 1.000000e+00 257; 258 %1 = insertelement <4 x float> undef, float %a, i32 0 259 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 260 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 261 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 262 %5 = insertelement <4 x float> undef, float %b, i32 0 263 %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5) 264 %7 = extractelement <4 x float> %6, i32 1 265 ret float %7 266} 267 268define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) { 269; CHECK-LABEL: @test_min_ss( 270; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %b) 271; CHECK-NEXT: ret <4 x float> [[TMP1]] 272; 273 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 274 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 275 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 276 %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3) 277 ret <4 x float> %4 278} 279 280define float @test_min_ss_0(float %a, float %b) { 281; CHECK-LABEL: @test_min_ss_0( 282; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 283; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 284; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 285; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 286; CHECK-NEXT: ret float [[TMP4]] 287; 288 %1 = insertelement <4 x float> undef, float %a, i32 0 289 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 290 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 291 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 292 %5 = insertelement <4 x float> undef, float %b, i32 0 293 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 294 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 295 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 296 %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8) 297 %10 = extractelement <4 x float> %9, i32 0 298 ret float %10 299} 300 301define float @test_min_ss_2(float %a, float %b) { 302; CHECK-LABEL: @test_min_ss_2( 303; CHECK-NEXT: ret float 2.000000e+00 304; 305 %1 = insertelement <4 x float> undef, float %a, i32 0 306 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 307 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 308 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 309 %5 = insertelement <4 x float> undef, float %b, i32 0 310 %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5) 311 %7 = extractelement <4 x float> %6, i32 2 312 ret float %7 313} 314 315define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) { 316; CHECK-LABEL: @test_max_ss( 317; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %b) 318; CHECK-NEXT: ret <4 x float> [[TMP1]] 319; 320 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 321 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 322 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 323 %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3) 324 ret <4 x float> %4 325} 326 327define float @test_max_ss_0(float %a, float %b) { 328; CHECK-LABEL: @test_max_ss_0( 329; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 330; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 331; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 332; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 333; CHECK-NEXT: ret float [[TMP4]] 334; 335 %1 = insertelement <4 x float> undef, float %a, i32 0 336 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 337 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 338 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 339 %5 = insertelement <4 x float> undef, float %b, i32 0 340 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 341 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 342 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 343 %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8) 344 %10 = extractelement <4 x float> %9, i32 0 345 ret float %10 346} 347 348define float @test_max_ss_3(float %a, float %b) { 349; CHECK-LABEL: @test_max_ss_3( 350; CHECK-NEXT: ret float 3.000000e+00 351; 352 %1 = insertelement <4 x float> undef, float %a, i32 0 353 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 354 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 355 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 356 %5 = insertelement <4 x float> undef, float %b, i32 0 357 %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5) 358 %7 = extractelement <4 x float> %6, i32 3 359 ret float %7 360} 361 362define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) { 363; CHECK-LABEL: @test_cmp_ss( 364; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %b, i8 0) 365; CHECK-NEXT: ret <4 x float> [[TMP1]] 366; 367 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 368 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 369 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 370 %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0) 371 ret <4 x float> %4 372} 373 374define float @test_cmp_ss_0(float %a, float %b) { 375; CHECK-LABEL: @test_cmp_ss_0( 376; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 377; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 378; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0) 379; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 380; CHECK-NEXT: ret float [[R]] 381; 382 %1 = insertelement <4 x float> undef, float %a, i32 0 383 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 384 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 385 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 386 %5 = insertelement <4 x float> undef, float %b, i32 0 387 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 388 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 389 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 390 %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0) 391 %r = extractelement <4 x float> %9, i32 0 392 ret float %r 393} 394 395define float @test_cmp_ss_1(float %a, float %b) { 396; CHECK-LABEL: @test_cmp_ss_1( 397; CHECK-NEXT: ret float 1.000000e+00 398; 399 %1 = insertelement <4 x float> undef, float %a, i32 0 400 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 401 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 402 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 403 %5 = insertelement <4 x float> undef, float %b, i32 0 404 %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0) 405 %7 = extractelement <4 x float> %6, i32 1 406 ret float %7 407} 408 409define i32 @test_comieq_ss_0(float %a, float %b) { 410; CHECK-LABEL: @test_comieq_ss_0( 411; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 412; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 413; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 414; CHECK-NEXT: ret i32 [[TMP3]] 415; 416 %1 = insertelement <4 x float> undef, float %a, i32 0 417 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 418 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 419 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 420 %5 = insertelement <4 x float> undef, float %b, i32 0 421 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 422 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 423 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 424 %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8) 425 ret i32 %9 426} 427 428define i32 @test_comige_ss_0(float %a, float %b) { 429; CHECK-LABEL: @test_comige_ss_0( 430; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 431; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 432; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 433; CHECK-NEXT: ret i32 [[TMP3]] 434; 435 %1 = insertelement <4 x float> undef, float %a, i32 0 436 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 437 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 438 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 439 %5 = insertelement <4 x float> undef, float %b, i32 0 440 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 441 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 442 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 443 %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8) 444 ret i32 %9 445} 446 447define i32 @test_comigt_ss_0(float %a, float %b) { 448; CHECK-LABEL: @test_comigt_ss_0( 449; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 450; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 451; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 452; CHECK-NEXT: ret i32 [[TMP3]] 453; 454 %1 = insertelement <4 x float> undef, float %a, i32 0 455 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 456 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 457 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 458 %5 = insertelement <4 x float> undef, float %b, i32 0 459 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 460 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 461 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 462 %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8) 463 ret i32 %9 464} 465 466define i32 @test_comile_ss_0(float %a, float %b) { 467; CHECK-LABEL: @test_comile_ss_0( 468; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 469; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 470; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 471; CHECK-NEXT: ret i32 [[TMP3]] 472; 473 %1 = insertelement <4 x float> undef, float %a, i32 0 474 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 475 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 476 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 477 %5 = insertelement <4 x float> undef, float %b, i32 0 478 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 479 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 480 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 481 %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8) 482 ret i32 %9 483} 484 485define i32 @test_comilt_ss_0(float %a, float %b) { 486; CHECK-LABEL: @test_comilt_ss_0( 487; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 488; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 489; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 490; CHECK-NEXT: ret i32 [[TMP3]] 491; 492 %1 = insertelement <4 x float> undef, float %a, i32 0 493 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 494 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 495 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 496 %5 = insertelement <4 x float> undef, float %b, i32 0 497 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 498 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 499 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 500 %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8) 501 ret i32 %9 502} 503 504define i32 @test_comineq_ss_0(float %a, float %b) { 505; CHECK-LABEL: @test_comineq_ss_0( 506; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 507; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 508; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 509; CHECK-NEXT: ret i32 [[TMP3]] 510; 511 %1 = insertelement <4 x float> undef, float %a, i32 0 512 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 513 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 514 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 515 %5 = insertelement <4 x float> undef, float %b, i32 0 516 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 517 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 518 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 519 %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8) 520 ret i32 %9 521} 522 523define i32 @test_ucomieq_ss_0(float %a, float %b) { 524; CHECK-LABEL: @test_ucomieq_ss_0( 525; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 526; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 527; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 528; CHECK-NEXT: ret i32 [[TMP3]] 529; 530 %1 = insertelement <4 x float> undef, float %a, i32 0 531 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 532 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 533 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 534 %5 = insertelement <4 x float> undef, float %b, i32 0 535 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 536 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 537 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 538 %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8) 539 ret i32 %9 540} 541 542define i32 @test_ucomige_ss_0(float %a, float %b) { 543; CHECK-LABEL: @test_ucomige_ss_0( 544; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 545; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 546; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 547; CHECK-NEXT: ret i32 [[TMP3]] 548; 549 %1 = insertelement <4 x float> undef, float %a, i32 0 550 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 551 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 552 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 553 %5 = insertelement <4 x float> undef, float %b, i32 0 554 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 555 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 556 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 557 %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8) 558 ret i32 %9 559} 560 561define i32 @test_ucomigt_ss_0(float %a, float %b) { 562; CHECK-LABEL: @test_ucomigt_ss_0( 563; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 564; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 565; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 566; CHECK-NEXT: ret i32 [[TMP3]] 567; 568 %1 = insertelement <4 x float> undef, float %a, i32 0 569 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 570 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 571 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 572 %5 = insertelement <4 x float> undef, float %b, i32 0 573 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 574 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 575 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 576 %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8) 577 ret i32 %9 578} 579 580define i32 @test_ucomile_ss_0(float %a, float %b) { 581; CHECK-LABEL: @test_ucomile_ss_0( 582; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 583; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 584; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 585; CHECK-NEXT: ret i32 [[TMP3]] 586; 587 %1 = insertelement <4 x float> undef, float %a, i32 0 588 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 589 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 590 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 591 %5 = insertelement <4 x float> undef, float %b, i32 0 592 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 593 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 594 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 595 %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8) 596 ret i32 %9 597} 598 599define i32 @test_ucomilt_ss_0(float %a, float %b) { 600; CHECK-LABEL: @test_ucomilt_ss_0( 601; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 602; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 603; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 604; CHECK-NEXT: ret i32 [[TMP3]] 605; 606 %1 = insertelement <4 x float> undef, float %a, i32 0 607 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 608 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 609 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 610 %5 = insertelement <4 x float> undef, float %b, i32 0 611 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 612 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 613 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 614 %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8) 615 ret i32 %9 616} 617 618define i32 @test_ucomineq_ss_0(float %a, float %b) { 619; CHECK-LABEL: @test_ucomineq_ss_0( 620; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 621; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 622; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 623; CHECK-NEXT: ret i32 [[TMP3]] 624; 625 %1 = insertelement <4 x float> undef, float %a, i32 0 626 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 627 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 628 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 629 %5 = insertelement <4 x float> undef, float %b, i32 0 630 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 631 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 632 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 633 %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8) 634 ret i32 %9 635} 636 637declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) 638declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) 639declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) 640 641declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) 642declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) 643declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) 644declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) 645declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) 646declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) 647declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) 648 649declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) 650declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) 651declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) 652declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) 653declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) 654declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) 655 656declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) 657declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) 658declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) 659declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) 660declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) 661declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) 662