• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s
3
4define <64 x i8> @test1(i8 * %addr) {
5; CHECK-LABEL: test1:
6; CHECK:       ## %bb.0:
7; CHECK-NEXT:    vmovups (%rdi), %zmm0
8; CHECK-NEXT:    retq
9  %vaddr = bitcast i8* %addr to <64 x i8>*
10  %res = load <64 x i8>, <64 x i8>* %vaddr, align 1
11  ret <64 x i8>%res
12}
13
14define void @test2(i8 * %addr, <64 x i8> %data) {
15; CHECK-LABEL: test2:
16; CHECK:       ## %bb.0:
17; CHECK-NEXT:    vmovups %zmm0, (%rdi)
18; CHECK-NEXT:    retq
19  %vaddr = bitcast i8* %addr to <64 x i8>*
20  store <64 x i8>%data, <64 x i8>* %vaddr, align 1
21  ret void
22}
23
24define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) {
25; CHECK-LABEL: test3:
26; CHECK:       ## %bb.0:
27; CHECK-NEXT:    vptestmb %zmm1, %zmm1, %k1
28; CHECK-NEXT:    vmovdqu8 (%rdi), %zmm0 {%k1}
29; CHECK-NEXT:    retq
30  %mask = icmp ne <64 x i8> %mask1, zeroinitializer
31  %vaddr = bitcast i8* %addr to <64 x i8>*
32  %r = load <64 x i8>, <64 x i8>* %vaddr, align 1
33  %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> %old
34  ret <64 x i8>%res
35}
36
37define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) {
38; CHECK-LABEL: test4:
39; CHECK:       ## %bb.0:
40; CHECK-NEXT:    vptestmb %zmm0, %zmm0, %k1
41; CHECK-NEXT:    vmovdqu8 (%rdi), %zmm0 {%k1} {z}
42; CHECK-NEXT:    retq
43  %mask = icmp ne <64 x i8> %mask1, zeroinitializer
44  %vaddr = bitcast i8* %addr to <64 x i8>*
45  %r = load <64 x i8>, <64 x i8>* %vaddr, align 1
46  %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> zeroinitializer
47  ret <64 x i8>%res
48}
49
50define <32 x i16> @test5(i8 * %addr) {
51; CHECK-LABEL: test5:
52; CHECK:       ## %bb.0:
53; CHECK-NEXT:    vmovups (%rdi), %zmm0
54; CHECK-NEXT:    retq
55  %vaddr = bitcast i8* %addr to <32 x i16>*
56  %res = load <32 x i16>, <32 x i16>* %vaddr, align 1
57  ret <32 x i16>%res
58}
59
60define void @test6(i8 * %addr, <32 x i16> %data) {
61; CHECK-LABEL: test6:
62; CHECK:       ## %bb.0:
63; CHECK-NEXT:    vmovups %zmm0, (%rdi)
64; CHECK-NEXT:    retq
65  %vaddr = bitcast i8* %addr to <32 x i16>*
66  store <32 x i16>%data, <32 x i16>* %vaddr, align 1
67  ret void
68}
69
70define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) {
71; CHECK-LABEL: test7:
72; CHECK:       ## %bb.0:
73; CHECK-NEXT:    vptestmw %zmm1, %zmm1, %k1
74; CHECK-NEXT:    vmovdqu16 (%rdi), %zmm0 {%k1}
75; CHECK-NEXT:    retq
76  %mask = icmp ne <32 x i16> %mask1, zeroinitializer
77  %vaddr = bitcast i8* %addr to <32 x i16>*
78  %r = load <32 x i16>, <32 x i16>* %vaddr, align 1
79  %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> %old
80  ret <32 x i16>%res
81}
82
83define <32 x i16> @test8(i8 * %addr, <32 x i16> %mask1) {
84; CHECK-LABEL: test8:
85; CHECK:       ## %bb.0:
86; CHECK-NEXT:    vptestmw %zmm0, %zmm0, %k1
87; CHECK-NEXT:    vmovdqu16 (%rdi), %zmm0 {%k1} {z}
88; CHECK-NEXT:    retq
89  %mask = icmp ne <32 x i16> %mask1, zeroinitializer
90  %vaddr = bitcast i8* %addr to <32 x i16>*
91  %r = load <32 x i16>, <32 x i16>* %vaddr, align 1
92  %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> zeroinitializer
93  ret <32 x i16>%res
94}
95
96define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
97; CHECK-LABEL: test_mask_load_16xi8:
98; CHECK:       ## %bb.0:
99; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
100; CHECK-NEXT:    vpmovb2m %zmm0, %k0
101; CHECK-NEXT:    kmovw %k0, %k1
102; CHECK-NEXT:    vmovdqu8 (%rdi), %zmm0 {%k1} {z}
103; CHECK-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
104; CHECK-NEXT:    retq
105  %res = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef)
106  ret <16 x i8> %res
107}
108declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
109
110define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
111; CHECK-LABEL: test_mask_load_32xi8:
112; CHECK:       ## %bb.0:
113; CHECK-NEXT:    vpsllw $7, %ymm0, %ymm0
114; CHECK-NEXT:    vpmovb2m %zmm0, %k0
115; CHECK-NEXT:    kmovd %k0, %k1
116; CHECK-NEXT:    vmovdqu8 (%rdi), %zmm0 {%k1} {z}
117; CHECK-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
118; CHECK-NEXT:    retq
119  %res = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer)
120  ret <32 x i8> %res
121}
122declare <32 x i8> @llvm.masked.load.v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>)
123
124define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
125; CHECK-LABEL: test_mask_load_8xi16:
126; CHECK:       ## %bb.0:
127; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
128; CHECK-NEXT:    vpmovw2m %zmm0, %k0
129; CHECK-NEXT:    kshiftld $24, %k0, %k0
130; CHECK-NEXT:    kshiftrd $24, %k0, %k1
131; CHECK-NEXT:    vmovdqu16 (%rdi), %zmm0 {%k1} {z}
132; CHECK-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
133; CHECK-NEXT:    retq
134  %res = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef)
135  ret <8 x i16> %res
136}
137declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
138
139define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
140; CHECK-LABEL: test_mask_load_16xi16:
141; CHECK:       ## %bb.0:
142; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
143; CHECK-NEXT:    vpmovb2m %zmm0, %k0
144; CHECK-NEXT:    kmovw %k0, %k1
145; CHECK-NEXT:    vmovdqu16 (%rdi), %zmm0 {%k1} {z}
146; CHECK-NEXT:    ## kill: def $ymm0 killed $ymm0 killed $zmm0
147; CHECK-NEXT:    retq
148  %res = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer)
149  ret <16 x i16> %res
150}
151declare <16 x i16> @llvm.masked.load.v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>)
152
153define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
154; CHECK-LABEL: test_mask_store_16xi8:
155; CHECK:       ## %bb.0:
156; CHECK-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
157; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
158; CHECK-NEXT:    vpmovb2m %zmm0, %k0
159; CHECK-NEXT:    kmovw %k0, %k1
160; CHECK-NEXT:    vmovdqu8 %zmm1, (%rdi) {%k1}
161; CHECK-NEXT:    retq
162  call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask)
163  ret void
164}
165declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
166
167define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
168; CHECK-LABEL: test_mask_store_32xi8:
169; CHECK:       ## %bb.0:
170; CHECK-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
171; CHECK-NEXT:    vpsllw $7, %ymm0, %ymm0
172; CHECK-NEXT:    vpmovb2m %zmm0, %k0
173; CHECK-NEXT:    kmovd %k0, %k1
174; CHECK-NEXT:    vmovdqu8 %zmm1, (%rdi) {%k1}
175; CHECK-NEXT:    retq
176  call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask)
177  ret void
178}
179declare void @llvm.masked.store.v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
180
181define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
182; CHECK-LABEL: test_mask_store_8xi16:
183; CHECK:       ## %bb.0:
184; CHECK-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
185; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
186; CHECK-NEXT:    vpmovw2m %zmm0, %k0
187; CHECK-NEXT:    kshiftld $24, %k0, %k0
188; CHECK-NEXT:    kshiftrd $24, %k0, %k1
189; CHECK-NEXT:    vmovdqu16 %zmm1, (%rdi) {%k1}
190; CHECK-NEXT:    retq
191  call void @llvm.masked.store.v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask)
192  ret void
193}
194declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
195
196define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
197; CHECK-LABEL: test_mask_store_16xi16:
198; CHECK:       ## %bb.0:
199; CHECK-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
200; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
201; CHECK-NEXT:    vpmovb2m %zmm0, %k0
202; CHECK-NEXT:    kmovw %k0, %k1
203; CHECK-NEXT:    vmovdqu16 %zmm1, (%rdi) {%k1}
204; CHECK-NEXT:    retq
205  call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask)
206  ret void
207}
208declare void @llvm.masked.store.v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
209