Lines Matching refs:RECIP
3 … %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-RECIP
4 … %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefixes=AVX,FMA-RECIP
45 ; AVX-RECIP-LABEL: f32_one_step_2:
46 ; AVX-RECIP: # %bb.0:
47 ; AVX-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm1
48 ; AVX-RECIP-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
49 ; AVX-RECIP-NEXT: vmulss %xmm2, %xmm1, %xmm3
50 ; AVX-RECIP-NEXT: vmulss %xmm3, %xmm0, %xmm0
51 ; AVX-RECIP-NEXT: vsubss %xmm0, %xmm2, %xmm0
52 ; AVX-RECIP-NEXT: vmulss %xmm0, %xmm1, %xmm0
53 ; AVX-RECIP-NEXT: vaddss %xmm0, %xmm3, %xmm0
54 ; AVX-RECIP-NEXT: retq
56 ; FMA-RECIP-LABEL: f32_one_step_2:
57 ; FMA-RECIP: # %bb.0:
58 ; FMA-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm1
59 ; FMA-RECIP-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
60 ; FMA-RECIP-NEXT: vmulss %xmm2, %xmm1, %xmm3
61 ; FMA-RECIP-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm3 * xmm0) - xmm2
62 ; FMA-RECIP-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3
63 ; FMA-RECIP-NEXT: retq
142 ; AVX-RECIP-LABEL: f32_one_step_2_divs:
143 ; AVX-RECIP: # %bb.0:
144 ; AVX-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm1
145 ; AVX-RECIP-NEXT: vmulss %xmm1, %xmm0, %xmm0
146 ; AVX-RECIP-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
147 ; AVX-RECIP-NEXT: vsubss %xmm0, %xmm2, %xmm0
148 ; AVX-RECIP-NEXT: vmulss %xmm0, %xmm1, %xmm0
149 ; AVX-RECIP-NEXT: vaddss %xmm0, %xmm1, %xmm0
150 ; AVX-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
151 ; AVX-RECIP-NEXT: vmulss %xmm0, %xmm1, %xmm0
152 ; AVX-RECIP-NEXT: retq
154 ; FMA-RECIP-LABEL: f32_one_step_2_divs:
155 ; FMA-RECIP: # %bb.0:
156 ; FMA-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm1
157 ; FMA-RECIP-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem
158 ; FMA-RECIP-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
159 ; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
160 ; FMA-RECIP-NEXT: vmulss %xmm0, %xmm1, %xmm0
161 ; FMA-RECIP-NEXT: retq
250 ; AVX-RECIP-LABEL: f32_two_step_2:
251 ; AVX-RECIP: # %bb.0:
252 ; AVX-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm1
253 ; AVX-RECIP-NEXT: vmulss %xmm1, %xmm0, %xmm2
254 ; AVX-RECIP-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
255 ; AVX-RECIP-NEXT: vsubss %xmm2, %xmm3, %xmm2
256 ; AVX-RECIP-NEXT: vmulss %xmm2, %xmm1, %xmm2
257 ; AVX-RECIP-NEXT: vaddss %xmm2, %xmm1, %xmm1
258 ; AVX-RECIP-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
259 ; AVX-RECIP-NEXT: vmulss %xmm2, %xmm1, %xmm3
260 ; AVX-RECIP-NEXT: vmulss %xmm3, %xmm0, %xmm0
261 ; AVX-RECIP-NEXT: vsubss %xmm0, %xmm2, %xmm0
262 ; AVX-RECIP-NEXT: vmulss %xmm0, %xmm1, %xmm0
263 ; AVX-RECIP-NEXT: vaddss %xmm0, %xmm3, %xmm0
264 ; AVX-RECIP-NEXT: retq
266 ; FMA-RECIP-LABEL: f32_two_step_2:
267 ; FMA-RECIP: # %bb.0:
268 ; FMA-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm1
269 ; FMA-RECIP-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
270 ; FMA-RECIP-NEXT: vfmsub231ss {{.*#+}} xmm2 = (xmm0 * xmm1) - xmm2
271 ; FMA-RECIP-NEXT: vfnmadd132ss {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1
272 ; FMA-RECIP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
273 ; FMA-RECIP-NEXT: vmulss %xmm1, %xmm2, %xmm3
274 ; FMA-RECIP-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm3 * xmm0) - xmm1
275 ; FMA-RECIP-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm2 * xmm0) + xmm3
276 ; FMA-RECIP-NEXT: retq
378 ; AVX-RECIP-LABEL: v4f32_one_step2:
379 ; AVX-RECIP: # %bb.0:
380 ; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
381 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
382 ; AVX-RECIP-NEXT: vmulps %xmm2, %xmm1, %xmm3
383 ; AVX-RECIP-NEXT: vmulps %xmm3, %xmm0, %xmm0
384 ; AVX-RECIP-NEXT: vsubps %xmm0, %xmm2, %xmm0
385 ; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
386 ; AVX-RECIP-NEXT: vaddps %xmm0, %xmm3, %xmm0
387 ; AVX-RECIP-NEXT: retq
389 ; FMA-RECIP-LABEL: v4f32_one_step2:
390 ; FMA-RECIP: # %bb.0:
391 ; FMA-RECIP-NEXT: vrcpps %xmm0, %xmm1
392 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
393 ; FMA-RECIP-NEXT: vmulps %xmm2, %xmm1, %xmm3
394 ; FMA-RECIP-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm3 * xmm0) - xmm2
395 ; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3
396 ; FMA-RECIP-NEXT: retq
475 ; AVX-RECIP-LABEL: v4f32_one_step_2_divs:
476 ; AVX-RECIP: # %bb.0:
477 ; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
478 ; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm0
479 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
480 ; AVX-RECIP-NEXT: vsubps %xmm0, %xmm2, %xmm0
481 ; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
482 ; AVX-RECIP-NEXT: vaddps %xmm0, %xmm1, %xmm0
483 ; AVX-RECIP-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
484 ; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
485 ; AVX-RECIP-NEXT: retq
487 ; FMA-RECIP-LABEL: v4f32_one_step_2_divs:
488 ; FMA-RECIP: # %bb.0:
489 ; FMA-RECIP-NEXT: vrcpps %xmm0, %xmm1
490 ; FMA-RECIP-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem
491 ; FMA-RECIP-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
492 ; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
493 ; FMA-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
494 ; FMA-RECIP-NEXT: retq
594 ; AVX-RECIP-LABEL: v4f32_two_step2:
595 ; AVX-RECIP: # %bb.0:
596 ; AVX-RECIP-NEXT: vrcpps %xmm0, %xmm1
597 ; AVX-RECIP-NEXT: vmulps %xmm1, %xmm0, %xmm2
598 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
599 ; AVX-RECIP-NEXT: vsubps %xmm2, %xmm3, %xmm2
600 ; AVX-RECIP-NEXT: vmulps %xmm2, %xmm1, %xmm2
601 ; AVX-RECIP-NEXT: vaddps %xmm2, %xmm1, %xmm1
602 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
603 ; AVX-RECIP-NEXT: vmulps %xmm2, %xmm1, %xmm3
604 ; AVX-RECIP-NEXT: vmulps %xmm3, %xmm0, %xmm0
605 ; AVX-RECIP-NEXT: vsubps %xmm0, %xmm2, %xmm0
606 ; AVX-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
607 ; AVX-RECIP-NEXT: vaddps %xmm0, %xmm3, %xmm0
608 ; AVX-RECIP-NEXT: retq
610 ; FMA-RECIP-LABEL: v4f32_two_step2:
611 ; FMA-RECIP: # %bb.0:
612 ; FMA-RECIP-NEXT: vrcpps %xmm0, %xmm1
613 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
614 ; FMA-RECIP-NEXT: vfmsub231ps {{.*#+}} xmm2 = (xmm0 * xmm1) - xmm2
615 ; FMA-RECIP-NEXT: vfnmadd132ps {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1
616 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
617 ; FMA-RECIP-NEXT: vmulps %xmm1, %xmm2, %xmm3
618 ; FMA-RECIP-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm3 * xmm0) - xmm1
619 ; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm2 * xmm0) + xmm3
620 ; FMA-RECIP-NEXT: retq
731 ; AVX-RECIP-LABEL: v8f32_one_step2:
732 ; AVX-RECIP: # %bb.0:
733 ; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
734 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E…
735 ; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm3
736 ; AVX-RECIP-NEXT: vmulps %ymm3, %ymm0, %ymm0
737 ; AVX-RECIP-NEXT: vsubps %ymm0, %ymm2, %ymm0
738 ; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
739 ; AVX-RECIP-NEXT: vaddps %ymm0, %ymm3, %ymm0
740 ; AVX-RECIP-NEXT: retq
742 ; FMA-RECIP-LABEL: v8f32_one_step2:
743 ; FMA-RECIP: # %bb.0:
744 ; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm1
745 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E…
746 ; FMA-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm3
747 ; FMA-RECIP-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm3 * ymm0) - ymm2
748 ; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm3
749 ; FMA-RECIP-NEXT: retq
837 ; AVX-RECIP-LABEL: v8f32_one_step_2_divs:
838 ; AVX-RECIP: # %bb.0:
839 ; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
840 ; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm0
841 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E…
842 ; AVX-RECIP-NEXT: vsubps %ymm0, %ymm2, %ymm0
843 ; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
844 ; AVX-RECIP-NEXT: vaddps %ymm0, %ymm1, %ymm0
845 ; AVX-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
846 ; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
847 ; AVX-RECIP-NEXT: retq
849 ; FMA-RECIP-LABEL: v8f32_one_step_2_divs:
850 ; FMA-RECIP: # %bb.0:
851 ; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm1
852 ; FMA-RECIP-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem
853 ; FMA-RECIP-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm1
854 ; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
855 ; FMA-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
856 ; FMA-RECIP-NEXT: retq
971 ; AVX-RECIP-LABEL: v8f32_two_step2:
972 ; AVX-RECIP: # %bb.0:
973 ; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm1
974 ; AVX-RECIP-NEXT: vmulps %ymm1, %ymm0, %ymm2
975 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E…
976 ; AVX-RECIP-NEXT: vsubps %ymm2, %ymm3, %ymm2
977 ; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm2
978 ; AVX-RECIP-NEXT: vaddps %ymm2, %ymm1, %ymm1
979 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E…
980 ; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm3
981 ; AVX-RECIP-NEXT: vmulps %ymm3, %ymm0, %ymm0
982 ; AVX-RECIP-NEXT: vsubps %ymm0, %ymm2, %ymm0
983 ; AVX-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
984 ; AVX-RECIP-NEXT: vaddps %ymm0, %ymm3, %ymm0
985 ; AVX-RECIP-NEXT: retq
987 ; FMA-RECIP-LABEL: v8f32_two_step2:
988 ; FMA-RECIP: # %bb.0:
989 ; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm1
990 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E…
991 ; FMA-RECIP-NEXT: vfmsub231ps {{.*#+}} ymm2 = (ymm0 * ymm1) - ymm2
992 ; FMA-RECIP-NEXT: vfnmadd132ps {{.*#+}} ymm2 = -(ymm2 * ymm1) + ymm1
993 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E…
994 ; FMA-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm3
995 ; FMA-RECIP-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm3 * ymm0) - ymm1
996 ; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3
997 ; FMA-RECIP-NEXT: retq
1159 ; AVX-RECIP-LABEL: v16f32_one_step2:
1160 ; AVX-RECIP: # %bb.0:
1161 ; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
1162 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E…
1163 ; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm4
1164 ; AVX-RECIP-NEXT: vmulps %ymm4, %ymm0, %ymm0
1165 ; AVX-RECIP-NEXT: vsubps %ymm0, %ymm3, %ymm0
1166 ; AVX-RECIP-NEXT: vmulps %ymm0, %ymm2, %ymm0
1167 ; AVX-RECIP-NEXT: vaddps %ymm0, %ymm4, %ymm0
1168 ; AVX-RECIP-NEXT: vrcpps %ymm1, %ymm2
1169 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E…
1170 ; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm4
1171 ; AVX-RECIP-NEXT: vmulps %ymm4, %ymm1, %ymm1
1172 ; AVX-RECIP-NEXT: vsubps %ymm1, %ymm3, %ymm1
1173 ; AVX-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm1
1174 ; AVX-RECIP-NEXT: vaddps %ymm1, %ymm4, %ymm1
1175 ; AVX-RECIP-NEXT: retq
1177 ; FMA-RECIP-LABEL: v16f32_one_step2:
1178 ; FMA-RECIP: # %bb.0:
1179 ; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
1180 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E…
1181 ; FMA-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm4
1182 ; FMA-RECIP-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm4 * ymm0) - ymm3
1183 ; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm4
1184 ; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
1185 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E…
1186 ; FMA-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm4
1187 ; FMA-RECIP-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm4 * ymm1) - ymm3
1188 ; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm4
1189 ; FMA-RECIP-NEXT: retq
1326 ; AVX-RECIP-LABEL: v16f32_one_step_2_divs:
1327 ; AVX-RECIP: # %bb.0:
1328 ; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
1329 ; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm0
1330 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E…
1331 ; AVX-RECIP-NEXT: vsubps %ymm0, %ymm3, %ymm0
1332 ; AVX-RECIP-NEXT: vmulps %ymm0, %ymm2, %ymm0
1333 ; AVX-RECIP-NEXT: vaddps %ymm0, %ymm2, %ymm0
1334 ; AVX-RECIP-NEXT: vrcpps %ymm1, %ymm2
1335 ; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm1
1336 ; AVX-RECIP-NEXT: vsubps %ymm1, %ymm3, %ymm1
1337 ; AVX-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm1
1338 ; AVX-RECIP-NEXT: vaddps %ymm1, %ymm2, %ymm1
1339 ; AVX-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2
1340 ; AVX-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3
1341 ; AVX-RECIP-NEXT: vmulps %ymm0, %ymm3, %ymm0
1342 ; AVX-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm1
1343 ; AVX-RECIP-NEXT: retq
1345 ; FMA-RECIP-LABEL: v16f32_one_step_2_divs:
1346 ; FMA-RECIP: # %bb.0:
1347 ; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
1348 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E…
1349 ; FMA-RECIP-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm3
1350 ; FMA-RECIP-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm2
1351 ; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
1352 ; FMA-RECIP-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm3
1353 ; FMA-RECIP-NEXT: vfnmadd132ps {{.*#+}} ymm1 = -(ymm1 * ymm2) + ymm2
1354 ; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2
1355 ; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3
1356 ; FMA-RECIP-NEXT: vmulps %ymm0, %ymm3, %ymm0
1357 ; FMA-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm1
1358 ; FMA-RECIP-NEXT: retq
1525 ; AVX-RECIP-LABEL: v16f32_two_step2:
1526 ; AVX-RECIP: # %bb.0:
1527 ; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm2
1528 ; AVX-RECIP-NEXT: vmulps %ymm2, %ymm0, %ymm3
1529 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E…
1530 ; AVX-RECIP-NEXT: vsubps %ymm3, %ymm4, %ymm3
1531 ; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm3
1532 ; AVX-RECIP-NEXT: vaddps %ymm3, %ymm2, %ymm2
1533 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E…
1534 ; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm5
1535 ; AVX-RECIP-NEXT: vmulps %ymm5, %ymm0, %ymm0
1536 ; AVX-RECIP-NEXT: vsubps %ymm0, %ymm3, %ymm0
1537 ; AVX-RECIP-NEXT: vmulps %ymm0, %ymm2, %ymm0
1538 ; AVX-RECIP-NEXT: vaddps %ymm0, %ymm5, %ymm0
1539 ; AVX-RECIP-NEXT: vrcpps %ymm1, %ymm2
1540 ; AVX-RECIP-NEXT: vmulps %ymm2, %ymm1, %ymm3
1541 ; AVX-RECIP-NEXT: vsubps %ymm3, %ymm4, %ymm3
1542 ; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm3
1543 ; AVX-RECIP-NEXT: vaddps %ymm3, %ymm2, %ymm2
1544 ; AVX-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E…
1545 ; AVX-RECIP-NEXT: vmulps %ymm3, %ymm2, %ymm4
1546 ; AVX-RECIP-NEXT: vmulps %ymm4, %ymm1, %ymm1
1547 ; AVX-RECIP-NEXT: vsubps %ymm1, %ymm3, %ymm1
1548 ; AVX-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm1
1549 ; AVX-RECIP-NEXT: vaddps %ymm1, %ymm4, %ymm1
1550 ; AVX-RECIP-NEXT: retq
1552 ; FMA-RECIP-LABEL: v16f32_two_step2:
1553 ; FMA-RECIP: # %bb.0:
1554 ; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm2
1555 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E…
1556 ; FMA-RECIP-NEXT: vmovaps %ymm2, %ymm4
1557 ; FMA-RECIP-NEXT: vfmsub213ps {{.*#+}} ymm4 = (ymm0 * ymm4) - ymm3
1558 ; FMA-RECIP-NEXT: vfnmadd132ps {{.*#+}} ymm4 = -(ymm4 * ymm2) + ymm2
1559 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E…
1560 ; FMA-RECIP-NEXT: vmulps %ymm2, %ymm4, %ymm5
1561 ; FMA-RECIP-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm5 * ymm0) - ymm2
1562 ; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm5
1563 ; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm2
1564 ; FMA-RECIP-NEXT: vfmsub231ps {{.*#+}} ymm3 = (ymm1 * ymm2) - ymm3
1565 ; FMA-RECIP-NEXT: vfnmadd132ps {{.*#+}} ymm3 = -(ymm3 * ymm2) + ymm2
1566 ; FMA-RECIP-NEXT: vmovaps {{.*#+}} ymm2 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E…
1567 ; FMA-RECIP-NEXT: vmulps %ymm2, %ymm3, %ymm4
1568 ; FMA-RECIP-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm4 * ymm1) - ymm2
1569 ; FMA-RECIP-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm3 * ymm1) + ymm4
1570 ; FMA-RECIP-NEXT: retq
1716 ; AVX-RECIP-LABEL: v16f32_no_step:
1717 ; AVX-RECIP: # %bb.0:
1718 ; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm0
1719 ; AVX-RECIP-NEXT: vrcpps %ymm1, %ymm1
1720 ; AVX-RECIP-NEXT: retq
1722 ; FMA-RECIP-LABEL: v16f32_no_step:
1723 ; FMA-RECIP: # %bb.0:
1724 ; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm0
1725 ; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm1
1726 ; FMA-RECIP-NEXT: retq
1779 ; AVX-RECIP-LABEL: v16f32_no_step2:
1780 ; AVX-RECIP: # %bb.0:
1781 ; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm0
1782 ; AVX-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
1783 ; AVX-RECIP-NEXT: vrcpps %ymm1, %ymm1
1784 ; AVX-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
1785 ; AVX-RECIP-NEXT: retq
1787 ; FMA-RECIP-LABEL: v16f32_no_step2:
1788 ; FMA-RECIP: # %bb.0:
1789 ; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm0
1790 ; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
1791 ; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm1
1792 ; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
1793 ; FMA-RECIP-NEXT: retq