• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -asm-verbose=0 -mtriple=aarch64-none-eabi -mattr=+bf16 | FileCheck %s
3
4; bfloat16x4_t test_vcreate_bf16(uint64_t a) { return vcreate_bf16(a); }
5define <4 x bfloat> @test_vcreate_bf16(i64 %a) nounwind {
6; CHECK-LABEL: test_vcreate_bf16:
7; CHECK-NEXT:    fmov d0, x0
8; CHECK-NEXT:    ret
9entry:
10  %0 = bitcast i64 %a to <4 x bfloat>
11  ret <4 x bfloat> %0
12}
13
14; bfloat16x4_t test_vdup_n_bf16(bfloat16_t v) { return vdup_n_bf16(v); }
15define <4 x bfloat> @test_vdup_n_bf16(bfloat %v) nounwind {
16; CHECK-LABEL: test_vdup_n_bf16:
17; CHECK-NEXT:    dup v0.4h, v0.h[0]
18; CHECK-NEXT:    ret
19entry:
20  %vecinit.i = insertelement <4 x bfloat> undef, bfloat %v, i32 0
21  %vecinit3.i = shufflevector <4 x bfloat> %vecinit.i, <4 x bfloat> undef, <4 x i32> zeroinitializer
22  ret <4 x bfloat> %vecinit3.i
23}
24
25; bfloat16x8_t test_vdupq_n_bf16(bfloat16_t v) { return vdupq_n_bf16(v); }
26define <8 x bfloat> @test_vdupq_n_bf16(bfloat %v) nounwind {
27; CHECK-LABEL: test_vdupq_n_bf16:
28; CHECK-NEXT:    dup v0.8h, v0.h[0]
29; CHECK-NEXT:    ret
30entry:
31  %vecinit.i = insertelement <8 x bfloat> undef, bfloat %v, i32 0
32  %vecinit7.i = shufflevector <8 x bfloat> %vecinit.i, <8 x bfloat> undef, <8 x i32> zeroinitializer
33  ret <8 x bfloat> %vecinit7.i
34}
35
36; bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) { return vdup_lane_bf16(v, 1); }
37define <4 x bfloat> @test_vdup_lane_bf16(<4 x bfloat> %v) nounwind {
38; CHECK-LABEL: test_vdup_lane_bf16:
39; CHECK-NEXT:    dup v0.4h, v0.h[1]
40; CHECK-NEXT:    ret
41entry:
42  %lane = shufflevector <4 x bfloat> %v, <4 x bfloat> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
43  ret <4 x bfloat> %lane
44}
45
46; bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) { return vdupq_lane_bf16(v, 1); }
47define <8 x bfloat> @test_vdupq_lane_bf16(<4 x bfloat> %v) nounwind {
48; CHECK-LABEL: test_vdupq_lane_bf16:
49; CHECK-NEXT:    dup v0.8h, v0.h[1]
50; CHECK-NEXT:    ret
51entry:
52  %lane = shufflevector <4 x bfloat> %v, <4 x bfloat> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
53  ret <8 x bfloat> %lane
54}
55
56; bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) { return vdup_laneq_bf16(v, 7); }
57define <4 x bfloat> @test_vdup_laneq_bf16(<8 x bfloat> %v) nounwind {
58; CHECK-LABEL: test_vdup_laneq_bf16:
59; CHECK-NEXT:    dup v0.4h, v0.h[7]
60; CHECK-NEXT:    ret
61entry:
62  %lane = shufflevector <8 x bfloat> %v, <8 x bfloat> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
63  ret <4 x bfloat> %lane
64}
65
66; bfloat16x8_t test_vdupq_laneq_bf16(bfloat16x8_t v) { return vdupq_laneq_bf16(v, 7); }
67define <8 x bfloat> @test_vdupq_laneq_bf16(<8 x bfloat> %v) nounwind {
68; CHECK-LABEL: test_vdupq_laneq_bf16:
69; CHECK-NEXT:    dup v0.8h, v0.h[7]
70; CHECK-NEXT:    ret
71entry:
72  %lane = shufflevector <8 x bfloat> %v, <8 x bfloat> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
73  ret <8 x bfloat> %lane
74}
75
76; bfloat16x8_t test_vcombine_bf16(bfloat16x4_t low, bfloat16x4_t high) { return vcombine_bf16(low, high); }
77define <8 x bfloat> @test_vcombine_bf16(<4 x bfloat> %low, <4 x bfloat> %high) nounwind {
78; CHECK-LABEL: test_vcombine_bf16:
79; CHECK-NEXT:    mov v0.d[1], v1.d[0]
80; CHECK-NEXT:    ret
81entry:
82  %shuffle.i = shufflevector <4 x bfloat> %low, <4 x bfloat> %high, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
83  ret <8 x bfloat> %shuffle.i
84}
85
86; bfloat16x4_t test_vget_high_bf16(bfloat16x8_t a) { return vget_high_bf16(a); }
87define <4 x bfloat> @test_vget_high_bf16(<8 x bfloat> %a) nounwind {
88; CHECK-LABEL: test_vget_high_bf16:
89; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
90; CHECK-NEXT:    ret
91entry:
92  %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
93  ret <4 x bfloat> %shuffle.i
94}
95
96; bfloat16x4_t test_vget_low_bf16(bfloat16x8_t a) { return vget_low_bf16(a); }
97define <4 x bfloat> @test_vget_low_bf16(<8 x bfloat> %a) nounwind {
98; CHECK-LABEL: test_vget_low_bf16:
99; CHECK-NEXT:    ret
100entry:
101  %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
102  ret <4 x bfloat> %shuffle.i
103}
104
105; bfloat16_t test_vget_lane_bf16(bfloat16x4_t v) { return vget_lane_bf16(v, 1); }
106define bfloat @test_vget_lane_bf16(<4 x bfloat> %v) nounwind {
107; CHECK-LABEL: test_vget_lane_bf16:
108; CHECK-NEXT:    mov h0, v0.h[1]
109; CHECK-NEXT:    ret
110entry:
111  %vget_lane = extractelement <4 x bfloat> %v, i32 1
112  ret bfloat %vget_lane
113}
114
115; bfloat16_t test_vgetq_lane_bf16(bfloat16x8_t v) { return vgetq_lane_bf16(v, 7); }
116define bfloat @test_vgetq_lane_bf16(<8 x bfloat> %v) nounwind {
117; CHECK-LABEL: test_vgetq_lane_bf16:
118; CHECK-NEXT:    mov h0, v0.h[7]
119; CHECK-NEXT:    ret
120entry:
121  %vgetq_lane = extractelement <8 x bfloat> %v, i32 7
122  ret bfloat %vgetq_lane
123}
124
125; bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, bfloat16x4_t v) { return vset_lane_bf16(a, v, 1); }
126define <4 x bfloat> @test_vset_lane_bf16(bfloat %a, <4 x bfloat> %v) nounwind {
127; CHECK-LABEL: test_vset_lane_bf16:
128; CHECK-NEXT:    mov v1.h[1], v0.h[0]
129; CHECK-NEXT:    mov v0.16b, v1.16b
130; CHECK-NEXT:    ret
131entry:
132  %vset_lane = insertelement <4 x bfloat> %v, bfloat %a, i32 1
133  ret <4 x bfloat> %vset_lane
134}
135
136; bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t v) { return vsetq_lane_bf16(a, v, 7); }
137define <8 x bfloat> @test_vsetq_lane_bf16(bfloat %a, <8 x bfloat> %v) nounwind {
138; CHECK-LABEL: test_vsetq_lane_bf16:
139; CHECK-NEXT:    mov v1.h[7], v0.h[0]
140; CHECK-NEXT:    mov v0.16b, v1.16b
141; CHECK-NEXT:    ret
142entry:
143  %vset_lane = insertelement <8 x bfloat> %v, bfloat %a, i32 7
144  ret <8 x bfloat> %vset_lane
145}
146
147; bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) { return vduph_lane_bf16(v, 1); }
148define bfloat @test_vduph_lane_bf16(<4 x bfloat> %v) nounwind {
149; CHECK-LABEL: test_vduph_lane_bf16:
150; CHECK-NEXT:    mov h0, v0.h[1]
151; CHECK-NEXT:    ret
152entry:
153  %vget_lane = extractelement <4 x bfloat> %v, i32 1
154  ret bfloat %vget_lane
155}
156
157; bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) { return vduph_laneq_bf16(v, 7); }
158define bfloat @test_vduph_laneq_bf16(<8 x bfloat> %v) nounwind {
159; CHECK-LABEL: test_vduph_laneq_bf16:
160; CHECK-NEXT:    mov h0, v0.h[7]
161; CHECK-NEXT:    ret
162entry:
163  %vgetq_lane = extractelement <8 x bfloat> %v, i32 7
164  ret bfloat %vgetq_lane
165}
166
167; vcopy_lane_bf16(a, 1, b, 3);
168define <4 x bfloat> @test_vcopy_lane_bf16_v1(<4 x bfloat> %a, <4 x bfloat> %b) nounwind {
169; CHECK-LABEL: test_vcopy_lane_bf16_v1:
170; CHECK-NEXT:    mov v0.h[1], v1.h[3]
171; CHECK-NEXT:    ret
172entry:
173  %vset_lane = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
174  ret <4 x bfloat> %vset_lane
175}
176
177; vcopy_lane_bf16(a, 2, b, 0);
178define <4 x bfloat> @test_vcopy_lane_bf16_v2(<4 x bfloat> %a, <4 x bfloat> %b) nounwind {
179; CHECK-LABEL: test_vcopy_lane_bf16_v2:
180; CHECK-NEXT:    mov v0.h[2], v1.h[0]
181; CHECK-NEXT:    ret
182entry:
183  %vset_lane = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
184  ret <4 x bfloat> %vset_lane
185}
186
187; vcopyq_lane_bf16(a, 0, b, 2);
188define <8 x bfloat> @test_vcopyq_lane_bf16_v1(<8 x bfloat> %a, <4 x bfloat> %b) nounwind {
189; CHECK-LABEL: test_vcopyq_lane_bf16_v1:
190; CHECK-NEXT:    mov v0.h[0], v1.h[2]
191; CHECK-NEXT:    ret
192entry:
193  %0 = shufflevector <4 x bfloat> %b, <4 x bfloat> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
194  %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %0, <8 x i32> <i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
195  ret <8 x bfloat> %vset_lane
196}
197
198; vcopyq_lane_bf16(a, 6, b, 0);
199define <8 x bfloat> @test_vcopyq_lane_bf16_v2(<8 x bfloat> %a, <4 x bfloat> %b) nounwind {
200; CHECK-LABEL: test_vcopyq_lane_bf16_v2:
201; CHECK-NEXT:    mov v0.h[6], v1.h[0]
202; CHECK-NEXT:    ret
203entry:
204  %0 = shufflevector <4 x bfloat> %b, <4 x bfloat> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
205  %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 7>
206  ret <8 x bfloat> %vset_lane
207}
208
209; vcopy_laneq_bf16(a, 0, b, 7);
210define <4 x bfloat> @test_vcopy_laneq_bf16_v1(<4 x bfloat> %a, <8 x bfloat> %b) nounwind {
211; CHECK-LABEL: test_vcopy_laneq_bf16_v1:
212; CHECK-NEXT:    mov v0.h[0], v1.h[7]
213; CHECK-NEXT:    ret
214entry:
215  %vgetq_lane = extractelement <8 x bfloat> %b, i32 7
216  %vset_lane = insertelement <4 x bfloat> %a, bfloat %vgetq_lane, i32 0
217  ret <4 x bfloat> %vset_lane
218}
219
220; vcopy_laneq_bf16(a, 3, b, 4);
221define <4 x bfloat> @test_vcopy_laneq_bf16_v2(<4 x bfloat> %a, <8 x bfloat> %b) nounwind {
222; CHECK-LABEL: test_vcopy_laneq_bf16_v2:
223; CHECK-NEXT:    mov v0.h[3], v1.h[4]
224; CHECK-NEXT:    ret
225entry:
226  %vgetq_lane = extractelement <8 x bfloat> %b, i32 4
227  %vset_lane = insertelement <4 x bfloat> %a, bfloat %vgetq_lane, i32 3
228  ret <4 x bfloat> %vset_lane
229}
230
231; vcopyq_laneq_bf16(a, 3, b, 7);
232define <8 x bfloat> @test_vcopyq_laneq_bf16_v1(<8 x bfloat> %a, <8 x bfloat> %b) nounwind {
233; CHECK-LABEL: test_vcopyq_laneq_bf16_v1:
234; CHECK-NEXT:    mov v0.h[3], v1.h[7]
235; CHECK-NEXT:    ret
236entry:
237  %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6, i32 7>
238  ret <8 x bfloat> %vset_lane
239}
240
241; vcopyq_laneq_bf16(a, 6, b, 2);
242define <8 x bfloat> @test_vcopyq_laneq_bf16_v2(<8 x bfloat> %a, <8 x bfloat> %b) nounwind {
243; CHECK-LABEL: test_vcopyq_laneq_bf16_v2:
244; CHECK-NEXT:    mov v0.h[6], v1.h[2]
245; CHECK-NEXT:    ret
246entry:
247  %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 10, i32 7>
248  ret <8 x bfloat> %vset_lane
249}
250