• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=X64
4
5define void @f(<8 x float> %A, i8* %B, <4 x double> %C, <4 x i64> %E, <8 x i32> %F, <16 x i16> %G, <32 x i8> %H) nounwind {
6; X32-LABEL: f:
7; X32:       # BB#0:
8; X32-NEXT:    pushl %ebp
9; X32-NEXT:    movl %esp, %ebp
10; X32-NEXT:    andl $-32, %esp
11; X32-NEXT:    subl $32, %esp
12; X32-NEXT:    vmovdqa 104(%ebp), %ymm3
13; X32-NEXT:    vmovdqa 72(%ebp), %ymm4
14; X32-NEXT:    vmovdqa 40(%ebp), %ymm5
15; X32-NEXT:    movl 8(%ebp), %eax
16; X32-NEXT:    vaddps .LCPI0_0, %ymm0, %ymm0
17; X32-NEXT:    vmovntps %ymm0, (%eax)
18; X32-NEXT:    vpaddq .LCPI0_1, %ymm2, %ymm0
19; X32-NEXT:    vmovntdq %ymm0, (%eax)
20; X32-NEXT:    vaddpd .LCPI0_2, %ymm1, %ymm0
21; X32-NEXT:    vmovntpd %ymm0, (%eax)
22; X32-NEXT:    vpaddd .LCPI0_3, %ymm5, %ymm0
23; X32-NEXT:    vmovntdq %ymm0, (%eax)
24; X32-NEXT:    vpaddw .LCPI0_4, %ymm4, %ymm0
25; X32-NEXT:    vmovntdq %ymm0, (%eax)
26; X32-NEXT:    vpaddb .LCPI0_5, %ymm3, %ymm0
27; X32-NEXT:    vmovntdq %ymm0, (%eax)
28; X32-NEXT:    movl %ebp, %esp
29; X32-NEXT:    popl %ebp
30; X32-NEXT:    vzeroupper
31; X32-NEXT:    retl
32;
33; X64-LABEL: f:
34; X64:       # BB#0:
35; X64-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
36; X64-NEXT:    vmovntps %ymm0, (%rdi)
37; X64-NEXT:    vpaddq {{.*}}(%rip), %ymm2, %ymm0
38; X64-NEXT:    vmovntdq %ymm0, (%rdi)
39; X64-NEXT:    vaddpd {{.*}}(%rip), %ymm1, %ymm0
40; X64-NEXT:    vmovntpd %ymm0, (%rdi)
41; X64-NEXT:    vpaddd {{.*}}(%rip), %ymm3, %ymm0
42; X64-NEXT:    vmovntdq %ymm0, (%rdi)
43; X64-NEXT:    vpaddw {{.*}}(%rip), %ymm4, %ymm0
44; X64-NEXT:    vmovntdq %ymm0, (%rdi)
45; X64-NEXT:    vpaddb {{.*}}(%rip), %ymm5, %ymm0
46; X64-NEXT:    vmovntdq %ymm0, (%rdi)
47; X64-NEXT:    vzeroupper
48; X64-NEXT:    retq
49  %cast = bitcast i8* %B to <8 x float>*
50  %A2 = fadd <8 x float> %A, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>
51  store <8 x float> %A2, <8 x float>* %cast, align 32, !nontemporal !0
52  %cast1 = bitcast i8* %B to <4 x i64>*
53  %E2 = add <4 x i64> %E, <i64 1, i64 2, i64 3, i64 4>
54  store <4 x i64> %E2, <4 x i64>* %cast1, align 32, !nontemporal !0
55  %cast2 = bitcast i8* %B to <4 x double>*
56  %C2 = fadd <4 x double> %C, <double 1.0, double 2.0, double 3.0, double 4.0>
57  store <4 x double> %C2, <4 x double>* %cast2, align 32, !nontemporal !0
58  %cast3 = bitcast i8* %B to <8 x i32>*
59  %F2 = add <8 x i32> %F, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
60  store <8 x i32> %F2, <8 x i32>* %cast3, align 32, !nontemporal !0
61  %cast4 = bitcast i8* %B to <16 x i16>*
62  %G2 = add <16 x i16> %G, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
63  store <16 x i16> %G2, <16 x i16>* %cast4, align 32, !nontemporal !0
64  %cast5 = bitcast i8* %B to <32 x i8>*
65  %H2 = add <32 x i8> %H, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>
66  store <32 x i8> %H2, <32 x i8>* %cast5, align 32, !nontemporal !0
67  ret void
68}
69
70!0 = !{i32 1}
71