• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
3; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVXONLY
4; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
5; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
6
7; Verify that fast-isel knows how to select aligned/unaligned vector loads.
8; Also verify that the selected load instruction is in the correct domain.
9
10define <16 x i8> @test_v16i8(<16 x i8>* %V) {
11; SSE-LABEL: test_v16i8:
12; SSE:       # %bb.0: # %entry
13; SSE-NEXT:    movdqa (%rdi), %xmm0
14; SSE-NEXT:    retq
15;
16; AVXONLY-LABEL: test_v16i8:
17; AVXONLY:       # %bb.0: # %entry
18; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
19; AVXONLY-NEXT:    retq
20;
21; KNL-LABEL: test_v16i8:
22; KNL:       # %bb.0: # %entry
23; KNL-NEXT:    vmovdqa (%rdi), %xmm0
24; KNL-NEXT:    retq
25;
26; SKX-LABEL: test_v16i8:
27; SKX:       # %bb.0: # %entry
28; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
29; SKX-NEXT:    retq
30entry:
31  %0 = load <16 x i8>, <16 x i8>* %V, align 16
32  ret <16 x i8> %0
33}
34
35define <8 x i16> @test_v8i16(<8 x i16>* %V) {
36; SSE-LABEL: test_v8i16:
37; SSE:       # %bb.0: # %entry
38; SSE-NEXT:    movdqa (%rdi), %xmm0
39; SSE-NEXT:    retq
40;
41; AVXONLY-LABEL: test_v8i16:
42; AVXONLY:       # %bb.0: # %entry
43; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
44; AVXONLY-NEXT:    retq
45;
46; KNL-LABEL: test_v8i16:
47; KNL:       # %bb.0: # %entry
48; KNL-NEXT:    vmovdqa (%rdi), %xmm0
49; KNL-NEXT:    retq
50;
51; SKX-LABEL: test_v8i16:
52; SKX:       # %bb.0: # %entry
53; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
54; SKX-NEXT:    retq
55entry:
56  %0 = load <8 x i16>, <8 x i16>* %V, align 16
57  ret <8 x i16> %0
58}
59
60define <4 x i32> @test_v4i32(<4 x i32>* %V) {
61; SSE-LABEL: test_v4i32:
62; SSE:       # %bb.0: # %entry
63; SSE-NEXT:    movdqa (%rdi), %xmm0
64; SSE-NEXT:    retq
65;
66; AVXONLY-LABEL: test_v4i32:
67; AVXONLY:       # %bb.0: # %entry
68; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
69; AVXONLY-NEXT:    retq
70;
71; KNL-LABEL: test_v4i32:
72; KNL:       # %bb.0: # %entry
73; KNL-NEXT:    vmovdqa (%rdi), %xmm0
74; KNL-NEXT:    retq
75;
76; SKX-LABEL: test_v4i32:
77; SKX:       # %bb.0: # %entry
78; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
79; SKX-NEXT:    retq
80entry:
81  %0 = load <4 x i32>, <4 x i32>* %V, align 16
82  ret <4 x i32> %0
83}
84
85define <2 x i64> @test_v2i64(<2 x i64>* %V) {
86; SSE-LABEL: test_v2i64:
87; SSE:       # %bb.0: # %entry
88; SSE-NEXT:    movdqa (%rdi), %xmm0
89; SSE-NEXT:    retq
90;
91; AVXONLY-LABEL: test_v2i64:
92; AVXONLY:       # %bb.0: # %entry
93; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
94; AVXONLY-NEXT:    retq
95;
96; KNL-LABEL: test_v2i64:
97; KNL:       # %bb.0: # %entry
98; KNL-NEXT:    vmovdqa (%rdi), %xmm0
99; KNL-NEXT:    retq
100;
101; SKX-LABEL: test_v2i64:
102; SKX:       # %bb.0: # %entry
103; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
104; SKX-NEXT:    retq
105entry:
106  %0 = load <2 x i64>, <2 x i64>* %V, align 16
107  ret <2 x i64> %0
108}
109
110define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) {
111; SSE-LABEL: test_v16i8_unaligned:
112; SSE:       # %bb.0: # %entry
113; SSE-NEXT:    movdqu (%rdi), %xmm0
114; SSE-NEXT:    retq
115;
116; AVXONLY-LABEL: test_v16i8_unaligned:
117; AVXONLY:       # %bb.0: # %entry
118; AVXONLY-NEXT:    vmovdqu (%rdi), %xmm0
119; AVXONLY-NEXT:    retq
120;
121; KNL-LABEL: test_v16i8_unaligned:
122; KNL:       # %bb.0: # %entry
123; KNL-NEXT:    vmovdqu (%rdi), %xmm0
124; KNL-NEXT:    retq
125;
126; SKX-LABEL: test_v16i8_unaligned:
127; SKX:       # %bb.0: # %entry
128; SKX-NEXT:    vmovdqu64 (%rdi), %xmm0
129; SKX-NEXT:    retq
130entry:
131  %0 = load <16 x i8>, <16 x i8>* %V, align 4
132  ret <16 x i8> %0
133}
134
135define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) {
136; SSE-LABEL: test_v8i16_unaligned:
137; SSE:       # %bb.0: # %entry
138; SSE-NEXT:    movdqu (%rdi), %xmm0
139; SSE-NEXT:    retq
140;
141; AVXONLY-LABEL: test_v8i16_unaligned:
142; AVXONLY:       # %bb.0: # %entry
143; AVXONLY-NEXT:    vmovdqu (%rdi), %xmm0
144; AVXONLY-NEXT:    retq
145;
146; KNL-LABEL: test_v8i16_unaligned:
147; KNL:       # %bb.0: # %entry
148; KNL-NEXT:    vmovdqu (%rdi), %xmm0
149; KNL-NEXT:    retq
150;
151; SKX-LABEL: test_v8i16_unaligned:
152; SKX:       # %bb.0: # %entry
153; SKX-NEXT:    vmovdqu64 (%rdi), %xmm0
154; SKX-NEXT:    retq
155entry:
156  %0 = load <8 x i16>, <8 x i16>* %V, align 4
157  ret <8 x i16> %0
158}
159
160define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) {
161; SSE-LABEL: test_v4i32_unaligned:
162; SSE:       # %bb.0: # %entry
163; SSE-NEXT:    movdqu (%rdi), %xmm0
164; SSE-NEXT:    retq
165;
166; AVXONLY-LABEL: test_v4i32_unaligned:
167; AVXONLY:       # %bb.0: # %entry
168; AVXONLY-NEXT:    vmovdqu (%rdi), %xmm0
169; AVXONLY-NEXT:    retq
170;
171; KNL-LABEL: test_v4i32_unaligned:
172; KNL:       # %bb.0: # %entry
173; KNL-NEXT:    vmovdqu (%rdi), %xmm0
174; KNL-NEXT:    retq
175;
176; SKX-LABEL: test_v4i32_unaligned:
177; SKX:       # %bb.0: # %entry
178; SKX-NEXT:    vmovdqu64 (%rdi), %xmm0
179; SKX-NEXT:    retq
180entry:
181  %0 = load <4 x i32>, <4 x i32>* %V, align 4
182  ret <4 x i32> %0
183}
184
185define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) {
186; SSE-LABEL: test_v2i64_unaligned:
187; SSE:       # %bb.0: # %entry
188; SSE-NEXT:    movdqu (%rdi), %xmm0
189; SSE-NEXT:    retq
190;
191; AVXONLY-LABEL: test_v2i64_unaligned:
192; AVXONLY:       # %bb.0: # %entry
193; AVXONLY-NEXT:    vmovdqu (%rdi), %xmm0
194; AVXONLY-NEXT:    retq
195;
196; KNL-LABEL: test_v2i64_unaligned:
197; KNL:       # %bb.0: # %entry
198; KNL-NEXT:    vmovdqu (%rdi), %xmm0
199; KNL-NEXT:    retq
200;
201; SKX-LABEL: test_v2i64_unaligned:
202; SKX:       # %bb.0: # %entry
203; SKX-NEXT:    vmovdqu64 (%rdi), %xmm0
204; SKX-NEXT:    retq
205entry:
206  %0 = load <2 x i64>, <2 x i64>* %V, align 4
207  ret <2 x i64> %0
208}
209
210define <4 x float> @test_v4f32(<4 x float>* %V) {
211; SSE-LABEL: test_v4f32:
212; SSE:       # %bb.0: # %entry
213; SSE-NEXT:    movaps (%rdi), %xmm0
214; SSE-NEXT:    retq
215;
216; AVX-LABEL: test_v4f32:
217; AVX:       # %bb.0: # %entry
218; AVX-NEXT:    vmovaps (%rdi), %xmm0
219; AVX-NEXT:    retq
220entry:
221  %0 = load <4 x float>, <4 x float>* %V, align 16
222  ret <4 x float> %0
223}
224
225define <2 x double> @test_v2f64(<2 x double>* %V) {
226; SSE-LABEL: test_v2f64:
227; SSE:       # %bb.0: # %entry
228; SSE-NEXT:    movapd (%rdi), %xmm0
229; SSE-NEXT:    retq
230;
231; AVX-LABEL: test_v2f64:
232; AVX:       # %bb.0: # %entry
233; AVX-NEXT:    vmovapd (%rdi), %xmm0
234; AVX-NEXT:    retq
235entry:
236  %0 = load <2 x double>, <2 x double>* %V, align 16
237  ret <2 x double> %0
238}
239
240define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) {
241; SSE-LABEL: test_v4f32_unaligned:
242; SSE:       # %bb.0: # %entry
243; SSE-NEXT:    movups (%rdi), %xmm0
244; SSE-NEXT:    retq
245;
246; AVX-LABEL: test_v4f32_unaligned:
247; AVX:       # %bb.0: # %entry
248; AVX-NEXT:    vmovups (%rdi), %xmm0
249; AVX-NEXT:    retq
250entry:
251  %0 = load <4 x float>, <4 x float>* %V, align 4
252  ret <4 x float> %0
253}
254
255define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) {
256; SSE-LABEL: test_v2f64_unaligned:
257; SSE:       # %bb.0: # %entry
258; SSE-NEXT:    movupd (%rdi), %xmm0
259; SSE-NEXT:    retq
260;
261; AVX-LABEL: test_v2f64_unaligned:
262; AVX:       # %bb.0: # %entry
263; AVX-NEXT:    vmovupd (%rdi), %xmm0
264; AVX-NEXT:    retq
265entry:
266  %0 = load <2 x double>, <2 x double>* %V, align 4
267  ret <2 x double> %0
268}
269
270define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) {
271; SSE-LABEL: test_v16i8_abi_alignment:
272; SSE:       # %bb.0: # %entry
273; SSE-NEXT:    movdqa (%rdi), %xmm0
274; SSE-NEXT:    retq
275;
276; AVXONLY-LABEL: test_v16i8_abi_alignment:
277; AVXONLY:       # %bb.0: # %entry
278; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
279; AVXONLY-NEXT:    retq
280;
281; KNL-LABEL: test_v16i8_abi_alignment:
282; KNL:       # %bb.0: # %entry
283; KNL-NEXT:    vmovdqa (%rdi), %xmm0
284; KNL-NEXT:    retq
285;
286; SKX-LABEL: test_v16i8_abi_alignment:
287; SKX:       # %bb.0: # %entry
288; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
289; SKX-NEXT:    retq
290entry:
291  %0 = load <16 x i8>, <16 x i8>* %V
292  ret <16 x i8> %0
293}
294
295define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) {
296; SSE-LABEL: test_v8i16_abi_alignment:
297; SSE:       # %bb.0: # %entry
298; SSE-NEXT:    movdqa (%rdi), %xmm0
299; SSE-NEXT:    retq
300;
301; AVXONLY-LABEL: test_v8i16_abi_alignment:
302; AVXONLY:       # %bb.0: # %entry
303; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
304; AVXONLY-NEXT:    retq
305;
306; KNL-LABEL: test_v8i16_abi_alignment:
307; KNL:       # %bb.0: # %entry
308; KNL-NEXT:    vmovdqa (%rdi), %xmm0
309; KNL-NEXT:    retq
310;
311; SKX-LABEL: test_v8i16_abi_alignment:
312; SKX:       # %bb.0: # %entry
313; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
314; SKX-NEXT:    retq
315entry:
316  %0 = load <8 x i16>, <8 x i16>* %V
317  ret <8 x i16> %0
318}
319
320define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) {
321; SSE-LABEL: test_v4i32_abi_alignment:
322; SSE:       # %bb.0: # %entry
323; SSE-NEXT:    movdqa (%rdi), %xmm0
324; SSE-NEXT:    retq
325;
326; AVXONLY-LABEL: test_v4i32_abi_alignment:
327; AVXONLY:       # %bb.0: # %entry
328; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
329; AVXONLY-NEXT:    retq
330;
331; KNL-LABEL: test_v4i32_abi_alignment:
332; KNL:       # %bb.0: # %entry
333; KNL-NEXT:    vmovdqa (%rdi), %xmm0
334; KNL-NEXT:    retq
335;
336; SKX-LABEL: test_v4i32_abi_alignment:
337; SKX:       # %bb.0: # %entry
338; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
339; SKX-NEXT:    retq
340entry:
341  %0 = load <4 x i32>, <4 x i32>* %V
342  ret <4 x i32> %0
343}
344
345define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) {
346; SSE-LABEL: test_v2i64_abi_alignment:
347; SSE:       # %bb.0: # %entry
348; SSE-NEXT:    movdqa (%rdi), %xmm0
349; SSE-NEXT:    retq
350;
351; AVXONLY-LABEL: test_v2i64_abi_alignment:
352; AVXONLY:       # %bb.0: # %entry
353; AVXONLY-NEXT:    vmovdqa (%rdi), %xmm0
354; AVXONLY-NEXT:    retq
355;
356; KNL-LABEL: test_v2i64_abi_alignment:
357; KNL:       # %bb.0: # %entry
358; KNL-NEXT:    vmovdqa (%rdi), %xmm0
359; KNL-NEXT:    retq
360;
361; SKX-LABEL: test_v2i64_abi_alignment:
362; SKX:       # %bb.0: # %entry
363; SKX-NEXT:    vmovdqa64 (%rdi), %xmm0
364; SKX-NEXT:    retq
365entry:
366  %0 = load <2 x i64>, <2 x i64>* %V
367  ret <2 x i64> %0
368}
369
370define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) {
371; SSE-LABEL: test_v4f32_abi_alignment:
372; SSE:       # %bb.0: # %entry
373; SSE-NEXT:    movaps (%rdi), %xmm0
374; SSE-NEXT:    retq
375;
376; AVX-LABEL: test_v4f32_abi_alignment:
377; AVX:       # %bb.0: # %entry
378; AVX-NEXT:    vmovaps (%rdi), %xmm0
379; AVX-NEXT:    retq
380entry:
381  %0 = load <4 x float>, <4 x float>* %V
382  ret <4 x float> %0
383}
384
385define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) {
386; SSE-LABEL: test_v2f64_abi_alignment:
387; SSE:       # %bb.0: # %entry
388; SSE-NEXT:    movapd (%rdi), %xmm0
389; SSE-NEXT:    retq
390;
391; AVX-LABEL: test_v2f64_abi_alignment:
392; AVX:       # %bb.0: # %entry
393; AVX-NEXT:    vmovapd (%rdi), %xmm0
394; AVX-NEXT:    retq
395entry:
396  %0 = load <2 x double>, <2 x double>* %V
397  ret <2 x double> %0
398}
399
400define <32 x i8> @test_v32i8(<32 x i8>* %V) {
401; SSE-LABEL: test_v32i8:
402; SSE:       # %bb.0: # %entry
403; SSE-NEXT:    movaps (%rdi), %xmm0
404; SSE-NEXT:    movaps 16(%rdi), %xmm1
405; SSE-NEXT:    retq
406;
407; AVXONLY-LABEL: test_v32i8:
408; AVXONLY:       # %bb.0: # %entry
409; AVXONLY-NEXT:    vmovdqa (%rdi), %ymm0
410; AVXONLY-NEXT:    retq
411;
412; KNL-LABEL: test_v32i8:
413; KNL:       # %bb.0: # %entry
414; KNL-NEXT:    vmovdqa (%rdi), %ymm0
415; KNL-NEXT:    retq
416;
417; SKX-LABEL: test_v32i8:
418; SKX:       # %bb.0: # %entry
419; SKX-NEXT:    vmovdqa64 (%rdi), %ymm0
420; SKX-NEXT:    retq
421entry:
422  %0 = load <32 x i8>, <32 x i8>* %V, align 32
423  ret <32 x i8> %0
424}
425
426define <16 x i16> @test_v16i16(<16 x i16>* %V) {
427; SSE-LABEL: test_v16i16:
428; SSE:       # %bb.0: # %entry
429; SSE-NEXT:    movaps (%rdi), %xmm0
430; SSE-NEXT:    movaps 16(%rdi), %xmm1
431; SSE-NEXT:    retq
432;
433; AVXONLY-LABEL: test_v16i16:
434; AVXONLY:       # %bb.0: # %entry
435; AVXONLY-NEXT:    vmovdqa (%rdi), %ymm0
436; AVXONLY-NEXT:    retq
437;
438; KNL-LABEL: test_v16i16:
439; KNL:       # %bb.0: # %entry
440; KNL-NEXT:    vmovdqa (%rdi), %ymm0
441; KNL-NEXT:    retq
442;
443; SKX-LABEL: test_v16i16:
444; SKX:       # %bb.0: # %entry
445; SKX-NEXT:    vmovdqa64 (%rdi), %ymm0
446; SKX-NEXT:    retq
447entry:
448  %0 = load <16 x i16>, <16 x i16>* %V, align 32
449  ret <16 x i16> %0
450}
451
452define <8 x i32> @test_v8i32(<8 x i32>* %V) {
453; SSE-LABEL: test_v8i32:
454; SSE:       # %bb.0: # %entry
455; SSE-NEXT:    movaps (%rdi), %xmm0
456; SSE-NEXT:    movaps 16(%rdi), %xmm1
457; SSE-NEXT:    retq
458;
459; AVXONLY-LABEL: test_v8i32:
460; AVXONLY:       # %bb.0: # %entry
461; AVXONLY-NEXT:    vmovdqa (%rdi), %ymm0
462; AVXONLY-NEXT:    retq
463;
464; KNL-LABEL: test_v8i32:
465; KNL:       # %bb.0: # %entry
466; KNL-NEXT:    vmovdqa (%rdi), %ymm0
467; KNL-NEXT:    retq
468;
469; SKX-LABEL: test_v8i32:
470; SKX:       # %bb.0: # %entry
471; SKX-NEXT:    vmovdqa64 (%rdi), %ymm0
472; SKX-NEXT:    retq
473entry:
474  %0 = load <8 x i32>, <8 x i32>* %V, align 32
475  ret <8 x i32> %0
476}
477
478define <4 x i64> @test_v4i64(<4 x i64>* %V) {
479; SSE-LABEL: test_v4i64:
480; SSE:       # %bb.0: # %entry
481; SSE-NEXT:    movaps (%rdi), %xmm0
482; SSE-NEXT:    movaps 16(%rdi), %xmm1
483; SSE-NEXT:    retq
484;
485; AVXONLY-LABEL: test_v4i64:
486; AVXONLY:       # %bb.0: # %entry
487; AVXONLY-NEXT:    vmovdqa (%rdi), %ymm0
488; AVXONLY-NEXT:    retq
489;
490; KNL-LABEL: test_v4i64:
491; KNL:       # %bb.0: # %entry
492; KNL-NEXT:    vmovdqa (%rdi), %ymm0
493; KNL-NEXT:    retq
494;
495; SKX-LABEL: test_v4i64:
496; SKX:       # %bb.0: # %entry
497; SKX-NEXT:    vmovdqa64 (%rdi), %ymm0
498; SKX-NEXT:    retq
499entry:
500  %0 = load <4 x i64>, <4 x i64>* %V, align 32
501  ret <4 x i64> %0
502}
503
504define <32 x i8> @test_v32i8_unaligned(<32 x i8>* %V) {
505; SSE-LABEL: test_v32i8_unaligned:
506; SSE:       # %bb.0: # %entry
507; SSE-NEXT:    movups (%rdi), %xmm0
508; SSE-NEXT:    movups 16(%rdi), %xmm1
509; SSE-NEXT:    retq
510;
511; AVXONLY-LABEL: test_v32i8_unaligned:
512; AVXONLY:       # %bb.0: # %entry
513; AVXONLY-NEXT:    vmovdqu (%rdi), %ymm0
514; AVXONLY-NEXT:    retq
515;
516; KNL-LABEL: test_v32i8_unaligned:
517; KNL:       # %bb.0: # %entry
518; KNL-NEXT:    vmovdqu (%rdi), %ymm0
519; KNL-NEXT:    retq
520;
521; SKX-LABEL: test_v32i8_unaligned:
522; SKX:       # %bb.0: # %entry
523; SKX-NEXT:    vmovdqu64 (%rdi), %ymm0
524; SKX-NEXT:    retq
525entry:
526  %0 = load <32 x i8>, <32 x i8>* %V, align 4
527  ret <32 x i8> %0
528}
529
530define <16 x i16> @test_v16i16_unaligned(<16 x i16>* %V) {
531; SSE-LABEL: test_v16i16_unaligned:
532; SSE:       # %bb.0: # %entry
533; SSE-NEXT:    movups (%rdi), %xmm0
534; SSE-NEXT:    movups 16(%rdi), %xmm1
535; SSE-NEXT:    retq
536;
537; AVXONLY-LABEL: test_v16i16_unaligned:
538; AVXONLY:       # %bb.0: # %entry
539; AVXONLY-NEXT:    vmovdqu (%rdi), %ymm0
540; AVXONLY-NEXT:    retq
541;
542; KNL-LABEL: test_v16i16_unaligned:
543; KNL:       # %bb.0: # %entry
544; KNL-NEXT:    vmovdqu (%rdi), %ymm0
545; KNL-NEXT:    retq
546;
547; SKX-LABEL: test_v16i16_unaligned:
548; SKX:       # %bb.0: # %entry
549; SKX-NEXT:    vmovdqu64 (%rdi), %ymm0
550; SKX-NEXT:    retq
551entry:
552  %0 = load <16 x i16>, <16 x i16>* %V, align 4
553  ret <16 x i16> %0
554}
555
556define <8 x i32> @test_v8i32_unaligned(<8 x i32>* %V) {
557; SSE-LABEL: test_v8i32_unaligned:
558; SSE:       # %bb.0: # %entry
559; SSE-NEXT:    movups (%rdi), %xmm0
560; SSE-NEXT:    movups 16(%rdi), %xmm1
561; SSE-NEXT:    retq
562;
563; AVXONLY-LABEL: test_v8i32_unaligned:
564; AVXONLY:       # %bb.0: # %entry
565; AVXONLY-NEXT:    vmovdqu (%rdi), %ymm0
566; AVXONLY-NEXT:    retq
567;
568; KNL-LABEL: test_v8i32_unaligned:
569; KNL:       # %bb.0: # %entry
570; KNL-NEXT:    vmovdqu (%rdi), %ymm0
571; KNL-NEXT:    retq
572;
573; SKX-LABEL: test_v8i32_unaligned:
574; SKX:       # %bb.0: # %entry
575; SKX-NEXT:    vmovdqu64 (%rdi), %ymm0
576; SKX-NEXT:    retq
577entry:
578  %0 = load <8 x i32>, <8 x i32>* %V, align 4
579  ret <8 x i32> %0
580}
581
582define <4 x i64> @test_v4i64_unaligned(<4 x i64>* %V) {
583; SSE-LABEL: test_v4i64_unaligned:
584; SSE:       # %bb.0: # %entry
585; SSE-NEXT:    movups (%rdi), %xmm0
586; SSE-NEXT:    movups 16(%rdi), %xmm1
587; SSE-NEXT:    retq
588;
589; AVXONLY-LABEL: test_v4i64_unaligned:
590; AVXONLY:       # %bb.0: # %entry
591; AVXONLY-NEXT:    vmovdqu (%rdi), %ymm0
592; AVXONLY-NEXT:    retq
593;
594; KNL-LABEL: test_v4i64_unaligned:
595; KNL:       # %bb.0: # %entry
596; KNL-NEXT:    vmovdqu (%rdi), %ymm0
597; KNL-NEXT:    retq
598;
599; SKX-LABEL: test_v4i64_unaligned:
600; SKX:       # %bb.0: # %entry
601; SKX-NEXT:    vmovdqu64 (%rdi), %ymm0
602; SKX-NEXT:    retq
603entry:
604  %0 = load <4 x i64>, <4 x i64>* %V, align 4
605  ret <4 x i64> %0
606}
607
608define <8 x float> @test_v8f32(<8 x float>* %V) {
609; SSE-LABEL: test_v8f32:
610; SSE:       # %bb.0: # %entry
611; SSE-NEXT:    movaps (%rdi), %xmm0
612; SSE-NEXT:    movaps 16(%rdi), %xmm1
613; SSE-NEXT:    retq
614;
615; AVX-LABEL: test_v8f32:
616; AVX:       # %bb.0: # %entry
617; AVX-NEXT:    vmovaps (%rdi), %ymm0
618; AVX-NEXT:    retq
619entry:
620  %0 = load <8 x float>, <8 x float>* %V, align 32
621  ret <8 x float> %0
622}
623
624define <4 x double> @test_v4f64(<4 x double>* %V) {
625; SSE-LABEL: test_v4f64:
626; SSE:       # %bb.0: # %entry
627; SSE-NEXT:    movapd (%rdi), %xmm0
628; SSE-NEXT:    movapd 16(%rdi), %xmm1
629; SSE-NEXT:    retq
630;
631; AVX-LABEL: test_v4f64:
632; AVX:       # %bb.0: # %entry
633; AVX-NEXT:    vmovapd (%rdi), %ymm0
634; AVX-NEXT:    retq
635entry:
636  %0 = load <4 x double>, <4 x double>* %V, align 32
637  ret <4 x double> %0
638}
639
640define <8 x float> @test_v8f32_unaligned(<8 x float>* %V) {
641; SSE-LABEL: test_v8f32_unaligned:
642; SSE:       # %bb.0: # %entry
643; SSE-NEXT:    movups (%rdi), %xmm0
644; SSE-NEXT:    movups 16(%rdi), %xmm1
645; SSE-NEXT:    retq
646;
647; AVX-LABEL: test_v8f32_unaligned:
648; AVX:       # %bb.0: # %entry
649; AVX-NEXT:    vmovups (%rdi), %ymm0
650; AVX-NEXT:    retq
651entry:
652  %0 = load <8 x float>, <8 x float>* %V, align 4
653  ret <8 x float> %0
654}
655
656define <4 x double> @test_v4f64_unaligned(<4 x double>* %V) {
657; SSE-LABEL: test_v4f64_unaligned:
658; SSE:       # %bb.0: # %entry
659; SSE-NEXT:    movupd (%rdi), %xmm0
660; SSE-NEXT:    movupd 16(%rdi), %xmm1
661; SSE-NEXT:    retq
662;
663; AVX-LABEL: test_v4f64_unaligned:
664; AVX:       # %bb.0: # %entry
665; AVX-NEXT:    vmovupd (%rdi), %ymm0
666; AVX-NEXT:    retq
667entry:
668  %0 = load <4 x double>, <4 x double>* %V, align 4
669  ret <4 x double> %0
670}
671
672define <64 x i8> @test_v64i8(<64 x i8>* %V) {
673; SSE-LABEL: test_v64i8:
674; SSE:       # %bb.0: # %entry
675; SSE-NEXT:    movaps (%rdi), %xmm0
676; SSE-NEXT:    movaps 16(%rdi), %xmm1
677; SSE-NEXT:    movaps 32(%rdi), %xmm2
678; SSE-NEXT:    movaps 48(%rdi), %xmm3
679; SSE-NEXT:    retq
680;
681; AVXONLY-LABEL: test_v64i8:
682; AVXONLY:       # %bb.0: # %entry
683; AVXONLY-NEXT:    vmovaps (%rdi), %ymm0
684; AVXONLY-NEXT:    vmovaps 32(%rdi), %ymm1
685; AVXONLY-NEXT:    retq
686;
687; KNL-LABEL: test_v64i8:
688; KNL:       # %bb.0: # %entry
689; KNL-NEXT:    vmovaps (%rdi), %ymm0
690; KNL-NEXT:    vmovaps 32(%rdi), %ymm1
691; KNL-NEXT:    retq
692;
693; SKX-LABEL: test_v64i8:
694; SKX:       # %bb.0: # %entry
695; SKX-NEXT:    vmovdqa64 (%rdi), %zmm0
696; SKX-NEXT:    retq
697entry:
698  %0 = load <64 x i8>, <64 x i8>* %V, align 64
699  ret <64 x i8> %0
700}
701
702define <32 x i16> @test_v32i16(<32 x i16>* %V) {
703; SSE-LABEL: test_v32i16:
704; SSE:       # %bb.0: # %entry
705; SSE-NEXT:    movaps (%rdi), %xmm0
706; SSE-NEXT:    movaps 16(%rdi), %xmm1
707; SSE-NEXT:    movaps 32(%rdi), %xmm2
708; SSE-NEXT:    movaps 48(%rdi), %xmm3
709; SSE-NEXT:    retq
710;
711; AVXONLY-LABEL: test_v32i16:
712; AVXONLY:       # %bb.0: # %entry
713; AVXONLY-NEXT:    vmovaps (%rdi), %ymm0
714; AVXONLY-NEXT:    vmovaps 32(%rdi), %ymm1
715; AVXONLY-NEXT:    retq
716;
717; KNL-LABEL: test_v32i16:
718; KNL:       # %bb.0: # %entry
719; KNL-NEXT:    vmovaps (%rdi), %ymm0
720; KNL-NEXT:    vmovaps 32(%rdi), %ymm1
721; KNL-NEXT:    retq
722;
723; SKX-LABEL: test_v32i16:
724; SKX:       # %bb.0: # %entry
725; SKX-NEXT:    vmovdqa64 (%rdi), %zmm0
726; SKX-NEXT:    retq
727entry:
728  %0 = load <32 x i16>, <32 x i16>* %V, align 64
729  ret <32 x i16> %0
730}
731
732define <16 x i32> @test_v16i32(<16 x i32>* %V) {
733; SSE-LABEL: test_v16i32:
734; SSE:       # %bb.0: # %entry
735; SSE-NEXT:    movaps (%rdi), %xmm0
736; SSE-NEXT:    movaps 16(%rdi), %xmm1
737; SSE-NEXT:    movaps 32(%rdi), %xmm2
738; SSE-NEXT:    movaps 48(%rdi), %xmm3
739; SSE-NEXT:    retq
740;
741; AVXONLY-LABEL: test_v16i32:
742; AVXONLY:       # %bb.0: # %entry
743; AVXONLY-NEXT:    vmovaps (%rdi), %ymm0
744; AVXONLY-NEXT:    vmovaps 32(%rdi), %ymm1
745; AVXONLY-NEXT:    retq
746;
747; AVX512-LABEL: test_v16i32:
748; AVX512:       # %bb.0: # %entry
749; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
750; AVX512-NEXT:    retq
751entry:
752  %0 = load <16 x i32>, <16 x i32>* %V, align 64
753  ret <16 x i32> %0
754}
755
756define <8 x i64> @test_v8i64(<8 x i64>* %V) {
757; SSE-LABEL: test_v8i64:
758; SSE:       # %bb.0: # %entry
759; SSE-NEXT:    movaps (%rdi), %xmm0
760; SSE-NEXT:    movaps 16(%rdi), %xmm1
761; SSE-NEXT:    movaps 32(%rdi), %xmm2
762; SSE-NEXT:    movaps 48(%rdi), %xmm3
763; SSE-NEXT:    retq
764;
765; AVXONLY-LABEL: test_v8i64:
766; AVXONLY:       # %bb.0: # %entry
767; AVXONLY-NEXT:    vmovaps (%rdi), %ymm0
768; AVXONLY-NEXT:    vmovaps 32(%rdi), %ymm1
769; AVXONLY-NEXT:    retq
770;
771; AVX512-LABEL: test_v8i64:
772; AVX512:       # %bb.0: # %entry
773; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
774; AVX512-NEXT:    retq
775entry:
776  %0 = load <8 x i64>, <8 x i64>* %V, align 64
777  ret <8 x i64> %0
778}
779
780define <64 x i8> @test_v64i8_unaligned(<64 x i8>* %V) {
781; SSE-LABEL: test_v64i8_unaligned:
782; SSE:       # %bb.0: # %entry
783; SSE-NEXT:    movups (%rdi), %xmm0
784; SSE-NEXT:    movups 16(%rdi), %xmm1
785; SSE-NEXT:    movups 32(%rdi), %xmm2
786; SSE-NEXT:    movups 48(%rdi), %xmm3
787; SSE-NEXT:    retq
788;
789; AVXONLY-LABEL: test_v64i8_unaligned:
790; AVXONLY:       # %bb.0: # %entry
791; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
792; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
793; AVXONLY-NEXT:    retq
794;
795; KNL-LABEL: test_v64i8_unaligned:
796; KNL:       # %bb.0: # %entry
797; KNL-NEXT:    vmovups (%rdi), %ymm0
798; KNL-NEXT:    vmovups 32(%rdi), %ymm1
799; KNL-NEXT:    retq
800;
801; SKX-LABEL: test_v64i8_unaligned:
802; SKX:       # %bb.0: # %entry
803; SKX-NEXT:    vmovdqu64 (%rdi), %zmm0
804; SKX-NEXT:    retq
805entry:
806  %0 = load <64 x i8>, <64 x i8>* %V, align 4
807  ret <64 x i8> %0
808}
809
810define <32 x i16> @test_v32i16_unaligned(<32 x i16>* %V) {
811; SSE-LABEL: test_v32i16_unaligned:
812; SSE:       # %bb.0: # %entry
813; SSE-NEXT:    movups (%rdi), %xmm0
814; SSE-NEXT:    movups 16(%rdi), %xmm1
815; SSE-NEXT:    movups 32(%rdi), %xmm2
816; SSE-NEXT:    movups 48(%rdi), %xmm3
817; SSE-NEXT:    retq
818;
819; AVXONLY-LABEL: test_v32i16_unaligned:
820; AVXONLY:       # %bb.0: # %entry
821; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
822; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
823; AVXONLY-NEXT:    retq
824;
825; KNL-LABEL: test_v32i16_unaligned:
826; KNL:       # %bb.0: # %entry
827; KNL-NEXT:    vmovups (%rdi), %ymm0
828; KNL-NEXT:    vmovups 32(%rdi), %ymm1
829; KNL-NEXT:    retq
830;
831; SKX-LABEL: test_v32i16_unaligned:
832; SKX:       # %bb.0: # %entry
833; SKX-NEXT:    vmovdqu64 (%rdi), %zmm0
834; SKX-NEXT:    retq
835entry:
836  %0 = load <32 x i16>, <32 x i16>* %V, align 4
837  ret <32 x i16> %0
838}
839
840define <16 x i32> @test_v16i32_unaligned(<16 x i32>* %V) {
841; SSE-LABEL: test_v16i32_unaligned:
842; SSE:       # %bb.0: # %entry
843; SSE-NEXT:    movups (%rdi), %xmm0
844; SSE-NEXT:    movups 16(%rdi), %xmm1
845; SSE-NEXT:    movups 32(%rdi), %xmm2
846; SSE-NEXT:    movups 48(%rdi), %xmm3
847; SSE-NEXT:    retq
848;
849; AVXONLY-LABEL: test_v16i32_unaligned:
850; AVXONLY:       # %bb.0: # %entry
851; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
852; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
853; AVXONLY-NEXT:    retq
854;
855; AVX512-LABEL: test_v16i32_unaligned:
856; AVX512:       # %bb.0: # %entry
857; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
858; AVX512-NEXT:    retq
859entry:
860  %0 = load <16 x i32>, <16 x i32>* %V, align 4
861  ret <16 x i32> %0
862}
863
864define <8 x i64> @test_v8i64_unaligned(<8 x i64>* %V) {
865; SSE-LABEL: test_v8i64_unaligned:
866; SSE:       # %bb.0: # %entry
867; SSE-NEXT:    movups (%rdi), %xmm0
868; SSE-NEXT:    movups 16(%rdi), %xmm1
869; SSE-NEXT:    movups 32(%rdi), %xmm2
870; SSE-NEXT:    movups 48(%rdi), %xmm3
871; SSE-NEXT:    retq
872;
873; AVXONLY-LABEL: test_v8i64_unaligned:
874; AVXONLY:       # %bb.0: # %entry
875; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
876; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
877; AVXONLY-NEXT:    retq
878;
879; AVX512-LABEL: test_v8i64_unaligned:
880; AVX512:       # %bb.0: # %entry
881; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
882; AVX512-NEXT:    retq
883entry:
884  %0 = load <8 x i64>, <8 x i64>* %V, align 4
885  ret <8 x i64> %0
886}
887
888define <8 x float> @test_v16f32(<8 x float>* %V) {
889; SSE-LABEL: test_v16f32:
890; SSE:       # %bb.0: # %entry
891; SSE-NEXT:    movaps (%rdi), %xmm0
892; SSE-NEXT:    movaps 16(%rdi), %xmm1
893; SSE-NEXT:    retq
894;
895; AVX-LABEL: test_v16f32:
896; AVX:       # %bb.0: # %entry
897; AVX-NEXT:    vmovaps (%rdi), %ymm0
898; AVX-NEXT:    retq
899entry:
900  %0 = load <8 x float>, <8 x float>* %V, align 64
901  ret <8 x float> %0
902}
903
904define <8 x double> @test_v8f64(<8 x double>* %V) {
905; SSE-LABEL: test_v8f64:
906; SSE:       # %bb.0: # %entry
907; SSE-NEXT:    movapd (%rdi), %xmm0
908; SSE-NEXT:    movapd 16(%rdi), %xmm1
909; SSE-NEXT:    movapd 32(%rdi), %xmm2
910; SSE-NEXT:    movapd 48(%rdi), %xmm3
911; SSE-NEXT:    retq
912;
913; AVXONLY-LABEL: test_v8f64:
914; AVXONLY:       # %bb.0: # %entry
915; AVXONLY-NEXT:    vmovapd (%rdi), %ymm0
916; AVXONLY-NEXT:    vmovapd 32(%rdi), %ymm1
917; AVXONLY-NEXT:    retq
918;
919; AVX512-LABEL: test_v8f64:
920; AVX512:       # %bb.0: # %entry
921; AVX512-NEXT:    vmovapd (%rdi), %zmm0
922; AVX512-NEXT:    retq
923entry:
924  %0 = load <8 x double>, <8 x double>* %V, align 64
925  ret <8 x double> %0
926}
927
928define <16 x float> @test_v16f32_unaligned(<16 x float>* %V) {
929; SSE-LABEL: test_v16f32_unaligned:
930; SSE:       # %bb.0: # %entry
931; SSE-NEXT:    movups (%rdi), %xmm0
932; SSE-NEXT:    movups 16(%rdi), %xmm1
933; SSE-NEXT:    movups 32(%rdi), %xmm2
934; SSE-NEXT:    movups 48(%rdi), %xmm3
935; SSE-NEXT:    retq
936;
937; AVXONLY-LABEL: test_v16f32_unaligned:
938; AVXONLY:       # %bb.0: # %entry
939; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
940; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
941; AVXONLY-NEXT:    retq
942;
943; AVX512-LABEL: test_v16f32_unaligned:
944; AVX512:       # %bb.0: # %entry
945; AVX512-NEXT:    vmovups (%rdi), %zmm0
946; AVX512-NEXT:    retq
947entry:
948  %0 = load <16 x float>, <16 x float>* %V, align 4
949  ret <16 x float> %0
950}
951
952define <8 x double> @test_v8f64_unaligned(<8 x double>* %V) {
953; SSE-LABEL: test_v8f64_unaligned:
954; SSE:       # %bb.0: # %entry
955; SSE-NEXT:    movupd (%rdi), %xmm0
956; SSE-NEXT:    movupd 16(%rdi), %xmm1
957; SSE-NEXT:    movupd 32(%rdi), %xmm2
958; SSE-NEXT:    movupd 48(%rdi), %xmm3
959; SSE-NEXT:    retq
960;
961; AVXONLY-LABEL: test_v8f64_unaligned:
962; AVXONLY:       # %bb.0: # %entry
963; AVXONLY-NEXT:    vmovupd (%rdi), %ymm0
964; AVXONLY-NEXT:    vmovupd 32(%rdi), %ymm1
965; AVXONLY-NEXT:    retq
966;
967; AVX512-LABEL: test_v8f64_unaligned:
968; AVX512:       # %bb.0: # %entry
969; AVX512-NEXT:    vmovupd (%rdi), %zmm0
970; AVX512-NEXT:    retq
971entry:
972  %0 = load <8 x double>, <8 x double>* %V, align 4
973  ret <8 x double> %0
974}
975
976