1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s 3 4declare <4 x float> @foo(<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>) 5 6; Due to a bug in X86RegisterInfo::getLargestLegalSuperClass this test case was trying to use XMM16 and spill it without VLX support for the necessary store instruction. We briefly implemented the spill using VEXTRACTF32X4, but the bug in getLargestLegalSuperClass has now been fixed so we no longer use XMM16. 7 8define <4 x float> @bar(<4 x float>* %a1p, <4 x float>* %a2p, <4 x float> %a3, <4 x float> %a4, <16 x float>%c1, <16 x float>%c2) { 9; CHECK-LABEL: bar: 10; CHECK: # %bb.0: 11; CHECK-NEXT: subq $72, %rsp 12; CHECK-NEXT: .cfi_def_cfa_offset 80 13; CHECK-NEXT: vmovaps %xmm1, %xmm9 14; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm14 = [4,22,1,17,4,22,1,17,4,22,1,17,4,22,1,17] 15; CHECK-NEXT: # zmm14 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 16; CHECK-NEXT: vpermi2ps %zmm3, %zmm2, %zmm14 17; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm10 = [4,30,1,22,4,30,1,22,4,30,1,22,4,30,1,22] 18; CHECK-NEXT: # zmm10 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 19; CHECK-NEXT: vpermi2ps %zmm3, %zmm2, %zmm10 20; CHECK-NEXT: vbroadcastsd {{.*#+}} zmm7 = [85899345925,85899345925,85899345925,85899345925,85899345925,85899345925,85899345925,85899345925] 21; CHECK-NEXT: vpermi2ps %zmm3, %zmm2, %zmm7 22; CHECK-NEXT: vmovaps {{.*#+}} xmm8 = [4,28,1,29] 23; CHECK-NEXT: vpermi2ps %zmm3, %zmm2, %zmm8 24; CHECK-NEXT: vmovaps {{.*#+}} xmm4 = [4,21,1,7] 25; CHECK-NEXT: vpermi2ps %zmm3, %zmm2, %zmm4 26; CHECK-NEXT: vextractf128 $1, %ymm3, %xmm5 27; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm6 28; CHECK-NEXT: vunpcklps {{.*#+}} xmm11 = xmm6[0],xmm5[0],xmm6[1],xmm5[1] 29; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm11[0,1],xmm2[1],xmm11[3] 30; CHECK-NEXT: vinsertps {{.*#+}} xmm13 = xmm1[0,1,2],xmm3[1] 31; CHECK-NEXT: vinsertps {{.*#+}} xmm6 = xmm4[0,1,2],xmm3[1] 32; CHECK-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 33; CHECK-NEXT: vextractf32x4 $2, %zmm3, %xmm4 34; CHECK-NEXT: vblendps {{.*#+}} xmm4 = xmm1[0,1,2],xmm4[3] 35; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm2[3,3,3,3] 36; CHECK-NEXT: vunpcklps {{.*#+}} xmm5 = xmm0[0],xmm5[0],xmm0[1],xmm5[1] 37; CHECK-NEXT: vshufps {{.*#+}} xmm5 = xmm5[0,1],xmm2[1,3] 38; CHECK-NEXT: vinsertps {{.*#+}} xmm5 = xmm5[0,1,2],xmm3[1] 39; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm7[0,1],xmm2[1],xmm7[3] 40; CHECK-NEXT: vblendps {{.*#+}} xmm7 = xmm0[0,1,2],xmm3[3] 41; CHECK-NEXT: vblendps {{.*#+}} xmm12 = xmm1[0,1,2],xmm3[3] 42; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm8[0,1,2],xmm3[1] 43; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[1] 44; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm8 45; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm11[0,1],xmm2[3,3] 46; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[2] 47; CHECK-NEXT: vaddps %xmm2, %xmm14, %xmm2 48; CHECK-NEXT: vmovaps %xmm13, %xmm1 49; CHECK-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 50; CHECK-NEXT: vaddps %xmm10, %xmm13, %xmm10 51; CHECK-NEXT: vaddps %xmm13, %xmm13, %xmm3 52; CHECK-NEXT: vaddps %xmm12, %xmm14, %xmm0 53; CHECK-NEXT: vaddps %xmm0, %xmm8, %xmm0 54; CHECK-NEXT: vaddps %xmm0, %xmm13, %xmm0 55; CHECK-NEXT: vmovaps %xmm3, {{[0-9]+}}(%rsp) 56; CHECK-NEXT: vmovaps %xmm10, (%rsp) 57; CHECK-NEXT: vmovaps %xmm9, %xmm3 58; CHECK-NEXT: vzeroupper 59; CHECK-NEXT: callq foo 60; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 61; CHECK-NEXT: vaddps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload 62; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 63; CHECK-NEXT: addq $72, %rsp 64; CHECK-NEXT: .cfi_def_cfa_offset 8 65; CHECK-NEXT: retq 66 %a1 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 17> 67 %a2 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 21, i32 1, i32 17> 68 %a5 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 27> 69 %a6 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 3, i32 20, i32 1, i32 17> 70 %a7 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 21, i32 1, i32 17> 71 %a8 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 5, i32 20, i32 1, i32 19> 72 %a9 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 17> 73 %a10 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 17> 74 %ax2 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 19> 75 %ax5 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 17> 76 %ax6 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 22, i32 1, i32 18> 77 %ax7 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 1, i32 20, i32 1, i32 17> 78 %ax8 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 19> 79 %ax9 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 17> 80 %ax10 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 17> 81 %ay2 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 17> 82 %ay5 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 28, i32 1, i32 17> 83 %ay6 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 5, i32 20, i32 1, i32 17> 84 %ay7 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 30, i32 1, i32 22> 85 %ay8 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 1, i32 17> 86 %ay9 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 22, i32 1, i32 17> 87 %ay10 = shufflevector <16 x float>%c1, <16 x float>%c2, <4 x i32> <i32 4, i32 20, i32 3, i32 18> 88 89 %r1 = fadd <4 x float> %ay10, %ay9 90 %r2 = fadd <4 x float> %ay8, %ay7 91 %r3 = fadd <4 x float> %ay6, %ay5 92 %r4 = fadd <4 x float> %ay2, %ax10 93 %r5 = fadd <4 x float> %ay9, %ax8 94 %r6 = fadd <4 x float> %r5, %r3 95 %r7 = fadd <4 x float> %a9, %r6 96 %a11 = call <4 x float> @foo(<4 x float> %r7, <4 x float> %a10, <4 x float> %r1, <4 x float> %a4, <4 x float> %a5, <4 x float> %a6, <4 x float> %a7, <4 x float> %a8, <4 x float> %r2, <4 x float> %r4) 97 %a12 = fadd <4 x float> %a2, %a1 98 %a13 = fadd <4 x float> %a12, %a11 99 100 ret <4 x float> %a13 101} 102