1; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s 2; RUN: llc -mtriple=x86_64-pc-linux -mattr=-sse4.1 -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s 3; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7-avx < %s | FileCheck -check-prefix=CHECK -check-prefix=AVX %s 4 5; Ensure that the backend selects SSE/AVX scalar fp instructions 6; from a packed fp instrution plus a vector insert. 7 8 9define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { 10 %1 = fadd <4 x float> %a, %b 11 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 12 ret <4 x float> %2 13} 14 15; CHECK-LABEL: test_add_ss 16; SSE2: addss %xmm1, %xmm0 17; AVX: vaddss %xmm1, %xmm0, %xmm0 18; CHECK-NOT: movss 19; CHECK: ret 20 21 22define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { 23 %1 = fsub <4 x float> %a, %b 24 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 25 ret <4 x float> %2 26} 27 28; CHECK-LABEL: test_sub_ss 29; SSE2: subss %xmm1, %xmm0 30; AVX: vsubss %xmm1, %xmm0, %xmm0 31; CHECK-NOT: movss 32; CHECK: ret 33 34 35define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { 36 %1 = fmul <4 x float> %a, %b 37 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 38 ret <4 x float> %2 39} 40 41; CHECK-LABEL: test_mul_ss 42; SSE2: mulss %xmm1, %xmm0 43; AVX: vmulss %xmm1, %xmm0, %xmm0 44; CHECK-NOT: movss 45; CHECK: ret 46 47 48define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { 49 %1 = fdiv <4 x float> %a, %b 50 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 51 ret <4 x float> %2 52} 53 54; CHECK-LABEL: test_div_ss 55; SSE2: divss %xmm1, %xmm0 56; AVX: vdivss %xmm1, %xmm0, %xmm0 57; CHECK-NOT: movss 58; CHECK: ret 59 60 61define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) { 62 %1 = fadd <2 x double> %a, %b 63 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 64 ret <2 x double> %2 65} 66 67; CHECK-LABEL: test_add_sd 68; SSE2: addsd %xmm1, %xmm0 69; AVX: vaddsd %xmm1, %xmm0, %xmm0 70; CHECK-NOT: movsd 71; CHECK: ret 72 73 74define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) { 75 %1 = fsub <2 x double> %a, %b 76 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 77 ret <2 x double> %2 78} 79 80; CHECK-LABEL: test_sub_sd 81; SSE2: subsd %xmm1, %xmm0 82; AVX: vsubsd %xmm1, %xmm0, %xmm0 83; CHECK-NOT: movsd 84; CHECK: ret 85 86 87define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) { 88 %1 = fmul <2 x double> %a, %b 89 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 90 ret <2 x double> %2 91} 92 93; CHECK-LABEL: test_mul_sd 94; SSE2: mulsd %xmm1, %xmm0 95; AVX: vmulsd %xmm1, %xmm0, %xmm0 96; CHECK-NOT: movsd 97; CHECK: ret 98 99 100define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) { 101 %1 = fdiv <2 x double> %a, %b 102 %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3> 103 ret <2 x double> %2 104} 105 106; CHECK-LABEL: test_div_sd 107; SSE2: divsd %xmm1, %xmm0 108; AVX: vdivsd %xmm1, %xmm0, %xmm0 109; CHECK-NOT: movsd 110; CHECK: ret 111 112 113define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) { 114 %1 = fadd <4 x float> %b, %a 115 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 116 ret <4 x float> %2 117} 118 119; CHECK-LABEL: test2_add_ss 120; SSE2: addss %xmm0, %xmm1 121; AVX: vaddss %xmm0, %xmm1, %xmm0 122; CHECK-NOT: movss 123; CHECK: ret 124 125 126define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) { 127 %1 = fsub <4 x float> %b, %a 128 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 129 ret <4 x float> %2 130} 131 132; CHECK-LABEL: test2_sub_ss 133; SSE2: subss %xmm0, %xmm1 134; AVX: vsubss %xmm0, %xmm1, %xmm0 135; CHECK-NOT: movss 136; CHECK: ret 137 138 139define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) { 140 %1 = fmul <4 x float> %b, %a 141 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 142 ret <4 x float> %2 143} 144 145; CHECK-LABEL: test2_mul_ss 146; SSE2: mulss %xmm0, %xmm1 147; AVX: vmulss %xmm0, %xmm1, %xmm0 148; CHECK-NOT: movss 149; CHECK: ret 150 151 152define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) { 153 %1 = fdiv <4 x float> %b, %a 154 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 155 ret <4 x float> %2 156} 157 158; CHECK-LABEL: test2_div_ss 159; SSE2: divss %xmm0, %xmm1 160; AVX: vdivss %xmm0, %xmm1, %xmm0 161; CHECK-NOT: movss 162; CHECK: ret 163 164 165define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) { 166 %1 = fadd <2 x double> %b, %a 167 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 168 ret <2 x double> %2 169} 170 171; CHECK-LABEL: test2_add_sd 172; SSE2: addsd %xmm0, %xmm1 173; AVX: vaddsd %xmm0, %xmm1, %xmm0 174; CHECK-NOT: movsd 175; CHECK: ret 176 177 178define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) { 179 %1 = fsub <2 x double> %b, %a 180 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 181 ret <2 x double> %2 182} 183 184; CHECK-LABEL: test2_sub_sd 185; SSE2: subsd %xmm0, %xmm1 186; AVX: vsubsd %xmm0, %xmm1, %xmm0 187; CHECK-NOT: movsd 188; CHECK: ret 189 190 191define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) { 192 %1 = fmul <2 x double> %b, %a 193 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 194 ret <2 x double> %2 195} 196 197; CHECK-LABEL: test2_mul_sd 198; SSE2: mulsd %xmm0, %xmm1 199; AVX: vmulsd %xmm0, %xmm1, %xmm0 200; CHECK-NOT: movsd 201; CHECK: ret 202 203 204define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) { 205 %1 = fdiv <2 x double> %b, %a 206 %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3> 207 ret <2 x double> %2 208} 209 210; CHECK-LABEL: test2_div_sd 211; SSE2: divsd %xmm0, %xmm1 212; AVX: vdivsd %xmm0, %xmm1, %xmm0 213; CHECK-NOT: movsd 214; CHECK: ret 215 216 217define <4 x float> @test3_add_ss(<4 x float> %a, <4 x float> %b) { 218 %1 = fadd <4 x float> %a, %b 219 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 220 ret <4 x float> %2 221} 222 223; CHECK-LABEL: test3_add_ss 224; SSE2: addss %xmm1, %xmm0 225; AVX: vaddss %xmm1, %xmm0, %xmm0 226; CHECK-NOT: movss 227; CHECK: ret 228 229 230define <4 x float> @test3_sub_ss(<4 x float> %a, <4 x float> %b) { 231 %1 = fsub <4 x float> %a, %b 232 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 233 ret <4 x float> %2 234} 235 236; CHECK-LABEL: test3_sub_ss 237; SSE2: subss %xmm1, %xmm0 238; AVX: vsubss %xmm1, %xmm0, %xmm0 239; CHECK-NOT: movss 240; CHECK: ret 241 242 243define <4 x float> @test3_mul_ss(<4 x float> %a, <4 x float> %b) { 244 %1 = fmul <4 x float> %a, %b 245 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 246 ret <4 x float> %2 247} 248 249; CHECK-LABEL: test3_mul_ss 250; SSE2: mulss %xmm1, %xmm0 251; AVX: vmulss %xmm1, %xmm0, %xmm0 252; CHECK-NOT: movss 253; CHECK: ret 254 255 256define <4 x float> @test3_div_ss(<4 x float> %a, <4 x float> %b) { 257 %1 = fdiv <4 x float> %a, %b 258 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1 259 ret <4 x float> %2 260} 261 262; CHECK-LABEL: test3_div_ss 263; SSE2: divss %xmm1, %xmm0 264; AVX: vdivss %xmm1, %xmm0, %xmm0 265; CHECK-NOT: movss 266; CHECK: ret 267 268 269define <2 x double> @test3_add_sd(<2 x double> %a, <2 x double> %b) { 270 %1 = fadd <2 x double> %a, %b 271 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 272 ret <2 x double> %2 273} 274 275; CHECK-LABEL: test3_add_sd 276; SSE2: addsd %xmm1, %xmm0 277; AVX: vaddsd %xmm1, %xmm0, %xmm0 278; CHECK-NOT: movsd 279; CHECK: ret 280 281 282define <2 x double> @test3_sub_sd(<2 x double> %a, <2 x double> %b) { 283 %1 = fsub <2 x double> %a, %b 284 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 285 ret <2 x double> %2 286} 287 288; CHECK-LABEL: test3_sub_sd 289; SSE2: subsd %xmm1, %xmm0 290; AVX: vsubsd %xmm1, %xmm0, %xmm0 291; CHECK-NOT: movsd 292; CHECK: ret 293 294 295define <2 x double> @test3_mul_sd(<2 x double> %a, <2 x double> %b) { 296 %1 = fmul <2 x double> %a, %b 297 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 298 ret <2 x double> %2 299} 300 301; CHECK-LABEL: test3_mul_sd 302; SSE2: mulsd %xmm1, %xmm0 303; AVX: vmulsd %xmm1, %xmm0, %xmm0 304; CHECK-NOT: movsd 305; CHECK: ret 306 307 308define <2 x double> @test3_div_sd(<2 x double> %a, <2 x double> %b) { 309 %1 = fdiv <2 x double> %a, %b 310 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1 311 ret <2 x double> %2 312} 313 314; CHECK-LABEL: test3_div_sd 315; SSE2: divsd %xmm1, %xmm0 316; AVX: vdivsd %xmm1, %xmm0, %xmm0 317; CHECK-NOT: movsd 318; CHECK: ret 319 320 321define <4 x float> @test4_add_ss(<4 x float> %a, <4 x float> %b) { 322 %1 = fadd <4 x float> %b, %a 323 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 324 ret <4 x float> %2 325} 326 327; CHECK-LABEL: test4_add_ss 328; SSE2: addss %xmm0, %xmm1 329; AVX: vaddss %xmm0, %xmm1, %xmm0 330; CHECK-NOT: movss 331; CHECK: ret 332 333 334define <4 x float> @test4_sub_ss(<4 x float> %a, <4 x float> %b) { 335 %1 = fsub <4 x float> %b, %a 336 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 337 ret <4 x float> %2 338} 339 340; CHECK-LABEL: test4_sub_ss 341; SSE2: subss %xmm0, %xmm1 342; AVX: vsubss %xmm0, %xmm1, %xmm0 343; CHECK-NOT: movss 344; CHECK: ret 345 346 347define <4 x float> @test4_mul_ss(<4 x float> %a, <4 x float> %b) { 348 %1 = fmul <4 x float> %b, %a 349 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 350 ret <4 x float> %2 351} 352 353; CHECK-LABEL: test4_mul_ss 354; SSE2: mulss %xmm0, %xmm1 355; AVX: vmulss %xmm0, %xmm1, %xmm0 356; CHECK-NOT: movss 357; CHECK: ret 358 359 360define <4 x float> @test4_div_ss(<4 x float> %a, <4 x float> %b) { 361 %1 = fdiv <4 x float> %b, %a 362 %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1 363 ret <4 x float> %2 364} 365 366; CHECK-LABEL: test4_div_ss 367; SSE2: divss %xmm0, %xmm1 368; AVX: vdivss %xmm0, %xmm1, %xmm0 369; CHECK-NOT: movss 370; CHECK: ret 371 372 373define <2 x double> @test4_add_sd(<2 x double> %a, <2 x double> %b) { 374 %1 = fadd <2 x double> %b, %a 375 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 376 ret <2 x double> %2 377} 378 379; CHECK-LABEL: test4_add_sd 380; SSE2: addsd %xmm0, %xmm1 381; AVX: vaddsd %xmm0, %xmm1, %xmm0 382; CHECK-NOT: movsd 383; CHECK: ret 384 385 386define <2 x double> @test4_sub_sd(<2 x double> %a, <2 x double> %b) { 387 %1 = fsub <2 x double> %b, %a 388 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 389 ret <2 x double> %2 390} 391 392; CHECK-LABEL: test4_sub_sd 393; SSE2: subsd %xmm0, %xmm1 394; AVX: vsubsd %xmm0, %xmm1, %xmm0 395; CHECK-NOT: movsd 396; CHECK: ret 397 398 399define <2 x double> @test4_mul_sd(<2 x double> %a, <2 x double> %b) { 400 %1 = fmul <2 x double> %b, %a 401 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 402 ret <2 x double> %2 403} 404 405; CHECK-LABEL: test4_mul_sd 406; SSE2: mulsd %xmm0, %xmm1 407; AVX: vmulsd %xmm0, %xmm1, %xmm0 408; CHECK-NOT: movsd 409; CHECK: ret 410 411 412define <2 x double> @test4_div_sd(<2 x double> %a, <2 x double> %b) { 413 %1 = fdiv <2 x double> %b, %a 414 %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1 415 ret <2 x double> %2 416} 417 418; CHECK-LABEL: test4_div_sd 419; SSE2: divsd %xmm0, %xmm1 420; AVX: vdivsd %xmm0, %xmm1, %xmm0 421; CHECK-NOT: movsd 422; CHECK: ret 423 424