• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f -mattr=+avx512vl| FileCheck %s
3
4declare void @func_f32(float)
5define <8 x float> @_256_broadcast_ss_spill(float %x) {
6; CHECK-LABEL: _256_broadcast_ss_spill:
7; CHECK:       # BB#0:
8; CHECK-NEXT:    pushq %rax
9; CHECK-NEXT:  .Ltmp0:
10; CHECK-NEXT:    .cfi_def_cfa_offset 16
11; CHECK-NEXT:    vaddss %xmm0, %xmm0, %xmm0
12; CHECK-NEXT:    vmovss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Folded Spill
13; CHECK-NEXT:    callq func_f32
14; CHECK-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %ymm0 # 4-byte Folded Reload
15; CHECK-NEXT:    popq %rax
16; CHECK-NEXT:    retq
17  %a  = fadd float %x, %x
18  call void @func_f32(float %a)
19  %b = insertelement <8 x float> undef, float %a, i32 0
20  %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
21  ret <8 x float> %c
22}
23
24define <4 x float> @_128_broadcast_ss_spill(float %x) {
25; CHECK-LABEL: _128_broadcast_ss_spill:
26; CHECK:       # BB#0:
27; CHECK-NEXT:    pushq %rax
28; CHECK-NEXT:  .Ltmp1:
29; CHECK-NEXT:    .cfi_def_cfa_offset 16
30; CHECK-NEXT:    vaddss %xmm0, %xmm0, %xmm0
31; CHECK-NEXT:    vmovss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Folded Spill
32; CHECK-NEXT:    callq func_f32
33; CHECK-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
34; CHECK-NEXT:    popq %rax
35; CHECK-NEXT:    retq
36  %a  = fadd float %x, %x
37  call void @func_f32(float %a)
38  %b = insertelement <4 x float> undef, float %a, i32 0
39  %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
40  ret <4 x float> %c
41}
42
43declare void @func_f64(double)
44define <4 x double> @_256_broadcast_sd_spill(double %x) {
45; CHECK-LABEL: _256_broadcast_sd_spill:
46; CHECK:       # BB#0:
47; CHECK-NEXT:    pushq %rax
48; CHECK-NEXT:  .Ltmp2:
49; CHECK-NEXT:    .cfi_def_cfa_offset 16
50; CHECK-NEXT:    vaddsd %xmm0, %xmm0, %xmm0
51; CHECK-NEXT:    vmovsd %xmm0, (%rsp) # 8-byte Folded Spill
52; CHECK-NEXT:    callq func_f64
53; CHECK-NEXT:    vbroadcastsd (%rsp), %ymm0 # 8-byte Folded Reload
54; CHECK-NEXT:    popq %rax
55; CHECK-NEXT:    retq
56  %a  = fadd double %x, %x
57  call void @func_f64(double %a)
58  %b = insertelement <4 x double> undef, double %a, i32 0
59  %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
60  ret <4 x double> %c
61}
62
63define   <8 x float> @_inreg8xfloat(float %a) {
64; CHECK-LABEL: _inreg8xfloat:
65; CHECK:       # BB#0:
66; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
67; CHECK-NEXT:    retq
68  %b = insertelement <8 x float> undef, float %a, i32 0
69  %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
70  ret <8 x float> %c
71}
72
73define   <8 x float> @_ss8xfloat_mask(<8 x float> %i, float %a, <8 x i32> %mask1) {
74; CHECK-LABEL: _ss8xfloat_mask:
75; CHECK:       # BB#0:
76; CHECK-NEXT:    vpxord %ymm3, %ymm3, %ymm3
77; CHECK-NEXT:    vpcmpneqd %ymm3, %ymm2, %k1
78; CHECK-NEXT:    vbroadcastss %xmm1, %ymm0 {%k1}
79; CHECK-NEXT:    retq
80  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
81  %b = insertelement <8 x float> undef, float %a, i32 0
82  %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
83  %r = select <8 x i1> %mask, <8 x float> %c, <8 x float> %i
84  ret <8 x float> %r
85}
86
87define   <8 x float> @_ss8xfloat_maskz(float %a, <8 x i32> %mask1) {
88; CHECK-LABEL: _ss8xfloat_maskz:
89; CHECK:       # BB#0:
90; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2
91; CHECK-NEXT:    vpcmpneqd %ymm2, %ymm1, %k1
92; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z}
93; CHECK-NEXT:    retq
94  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
95  %b = insertelement <8 x float> undef, float %a, i32 0
96  %c = shufflevector <8 x float> %b, <8 x float> undef, <8 x i32> zeroinitializer
97  %r = select <8 x i1> %mask, <8 x float> %c, <8 x float> zeroinitializer
98  ret <8 x float> %r
99}
100
101define   <4 x float> @_inreg4xfloat(float %a) {
102; CHECK-LABEL: _inreg4xfloat:
103; CHECK:       # BB#0:
104; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
105; CHECK-NEXT:    retq
106  %b = insertelement <4 x float> undef, float %a, i32 0
107  %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
108  ret <4 x float> %c
109}
110
111define   <4 x float> @_ss4xfloat_mask(<4 x float> %i, float %a, <4 x i32> %mask1) {
112; CHECK-LABEL: _ss4xfloat_mask:
113; CHECK:       # BB#0:
114; CHECK-NEXT:    vpxord %xmm3, %xmm3, %xmm3
115; CHECK-NEXT:    vpcmpneqd %xmm3, %xmm2, %k1
116; CHECK-NEXT:    vbroadcastss %xmm1, %xmm0 {%k1}
117; CHECK-NEXT:    retq
118  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
119  %b = insertelement <4 x float> undef, float %a, i32 0
120  %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
121  %r = select <4 x i1> %mask, <4 x float> %c, <4 x float> %i
122  ret <4 x float> %r
123}
124
125define   <4 x float> @_ss4xfloat_maskz(float %a, <4 x i32> %mask1) {
126; CHECK-LABEL: _ss4xfloat_maskz:
127; CHECK:       # BB#0:
128; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2
129; CHECK-NEXT:    vpcmpneqd %xmm2, %xmm1, %k1
130; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
131; CHECK-NEXT:    retq
132  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
133  %b = insertelement <4 x float> undef, float %a, i32 0
134  %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
135  %r = select <4 x i1> %mask, <4 x float> %c, <4 x float> zeroinitializer
136  ret <4 x float> %r
137}
138
139define   <4 x double> @_inreg4xdouble(double %a) {
140; CHECK-LABEL: _inreg4xdouble:
141; CHECK:       # BB#0:
142; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
143; CHECK-NEXT:    retq
144  %b = insertelement <4 x double> undef, double %a, i32 0
145  %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
146  ret <4 x double> %c
147}
148
149define   <4 x double> @_ss4xdouble_mask(<4 x double> %i, double %a, <4 x i32> %mask1) {
150; CHECK-LABEL: _ss4xdouble_mask:
151; CHECK:       # BB#0:
152; CHECK-NEXT:    vpxord %xmm3, %xmm3, %xmm3
153; CHECK-NEXT:    vpcmpneqd %xmm3, %xmm2, %k1
154; CHECK-NEXT:    vbroadcastsd %xmm1, %ymm0 {%k1}
155; CHECK-NEXT:    retq
156  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
157  %b = insertelement <4 x double> undef, double %a, i32 0
158  %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
159  %r = select <4 x i1> %mask, <4 x double> %c, <4 x double> %i
160  ret <4 x double> %r
161}
162
163define   <4 x double> @_ss4xdouble_maskz(double %a, <4 x i32> %mask1) {
164; CHECK-LABEL: _ss4xdouble_maskz:
165; CHECK:       # BB#0:
166; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2
167; CHECK-NEXT:    vpcmpneqd %xmm2, %xmm1, %k1
168; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
169; CHECK-NEXT:    retq
170  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
171  %b = insertelement <4 x double> undef, double %a, i32 0
172  %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer
173  %r = select <4 x i1> %mask, <4 x double> %c, <4 x double> zeroinitializer
174  ret <4 x double> %r
175}
176