• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
3; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefixes=AVX,AVXONLY
4; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefixes=AVX,AVX512
5; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl < %s | FileCheck %s --check-prefixes=AVX,AVX512
6
7; Verify that fast-isel knows how to select aligned/unaligned vector loads.
8; Also verify that the selected load instruction is in the correct domain.
9
10define <16 x i8> @test_v16i8(<16 x i8>* %V) {
11; SSE-LABEL: test_v16i8:
12; SSE:       # %bb.0: # %entry
13; SSE-NEXT:    movdqa (%rdi), %xmm0
14; SSE-NEXT:    retq
15;
16; AVX-LABEL: test_v16i8:
17; AVX:       # %bb.0: # %entry
18; AVX-NEXT:    vmovdqa (%rdi), %xmm0
19; AVX-NEXT:    retq
20entry:
21  %0 = load <16 x i8>, <16 x i8>* %V, align 16
22  ret <16 x i8> %0
23}
24
25define <8 x i16> @test_v8i16(<8 x i16>* %V) {
26; SSE-LABEL: test_v8i16:
27; SSE:       # %bb.0: # %entry
28; SSE-NEXT:    movdqa (%rdi), %xmm0
29; SSE-NEXT:    retq
30;
31; AVX-LABEL: test_v8i16:
32; AVX:       # %bb.0: # %entry
33; AVX-NEXT:    vmovdqa (%rdi), %xmm0
34; AVX-NEXT:    retq
35entry:
36  %0 = load <8 x i16>, <8 x i16>* %V, align 16
37  ret <8 x i16> %0
38}
39
40define <4 x i32> @test_v4i32(<4 x i32>* %V) {
41; SSE-LABEL: test_v4i32:
42; SSE:       # %bb.0: # %entry
43; SSE-NEXT:    movdqa (%rdi), %xmm0
44; SSE-NEXT:    retq
45;
46; AVX-LABEL: test_v4i32:
47; AVX:       # %bb.0: # %entry
48; AVX-NEXT:    vmovdqa (%rdi), %xmm0
49; AVX-NEXT:    retq
50entry:
51  %0 = load <4 x i32>, <4 x i32>* %V, align 16
52  ret <4 x i32> %0
53}
54
55define <2 x i64> @test_v2i64(<2 x i64>* %V) {
56; SSE-LABEL: test_v2i64:
57; SSE:       # %bb.0: # %entry
58; SSE-NEXT:    movdqa (%rdi), %xmm0
59; SSE-NEXT:    retq
60;
61; AVX-LABEL: test_v2i64:
62; AVX:       # %bb.0: # %entry
63; AVX-NEXT:    vmovdqa (%rdi), %xmm0
64; AVX-NEXT:    retq
65entry:
66  %0 = load <2 x i64>, <2 x i64>* %V, align 16
67  ret <2 x i64> %0
68}
69
70define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) {
71; SSE-LABEL: test_v16i8_unaligned:
72; SSE:       # %bb.0: # %entry
73; SSE-NEXT:    movdqu (%rdi), %xmm0
74; SSE-NEXT:    retq
75;
76; AVX-LABEL: test_v16i8_unaligned:
77; AVX:       # %bb.0: # %entry
78; AVX-NEXT:    vmovdqu (%rdi), %xmm0
79; AVX-NEXT:    retq
80entry:
81  %0 = load <16 x i8>, <16 x i8>* %V, align 4
82  ret <16 x i8> %0
83}
84
85define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) {
86; SSE-LABEL: test_v8i16_unaligned:
87; SSE:       # %bb.0: # %entry
88; SSE-NEXT:    movdqu (%rdi), %xmm0
89; SSE-NEXT:    retq
90;
91; AVX-LABEL: test_v8i16_unaligned:
92; AVX:       # %bb.0: # %entry
93; AVX-NEXT:    vmovdqu (%rdi), %xmm0
94; AVX-NEXT:    retq
95entry:
96  %0 = load <8 x i16>, <8 x i16>* %V, align 4
97  ret <8 x i16> %0
98}
99
100define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) {
101; SSE-LABEL: test_v4i32_unaligned:
102; SSE:       # %bb.0: # %entry
103; SSE-NEXT:    movdqu (%rdi), %xmm0
104; SSE-NEXT:    retq
105;
106; AVX-LABEL: test_v4i32_unaligned:
107; AVX:       # %bb.0: # %entry
108; AVX-NEXT:    vmovdqu (%rdi), %xmm0
109; AVX-NEXT:    retq
110entry:
111  %0 = load <4 x i32>, <4 x i32>* %V, align 4
112  ret <4 x i32> %0
113}
114
115define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) {
116; SSE-LABEL: test_v2i64_unaligned:
117; SSE:       # %bb.0: # %entry
118; SSE-NEXT:    movdqu (%rdi), %xmm0
119; SSE-NEXT:    retq
120;
121; AVX-LABEL: test_v2i64_unaligned:
122; AVX:       # %bb.0: # %entry
123; AVX-NEXT:    vmovdqu (%rdi), %xmm0
124; AVX-NEXT:    retq
125entry:
126  %0 = load <2 x i64>, <2 x i64>* %V, align 4
127  ret <2 x i64> %0
128}
129
130define <4 x float> @test_v4f32(<4 x float>* %V) {
131; SSE-LABEL: test_v4f32:
132; SSE:       # %bb.0: # %entry
133; SSE-NEXT:    movaps (%rdi), %xmm0
134; SSE-NEXT:    retq
135;
136; AVX-LABEL: test_v4f32:
137; AVX:       # %bb.0: # %entry
138; AVX-NEXT:    vmovaps (%rdi), %xmm0
139; AVX-NEXT:    retq
140entry:
141  %0 = load <4 x float>, <4 x float>* %V, align 16
142  ret <4 x float> %0
143}
144
145define <2 x double> @test_v2f64(<2 x double>* %V) {
146; SSE-LABEL: test_v2f64:
147; SSE:       # %bb.0: # %entry
148; SSE-NEXT:    movapd (%rdi), %xmm0
149; SSE-NEXT:    retq
150;
151; AVX-LABEL: test_v2f64:
152; AVX:       # %bb.0: # %entry
153; AVX-NEXT:    vmovapd (%rdi), %xmm0
154; AVX-NEXT:    retq
155entry:
156  %0 = load <2 x double>, <2 x double>* %V, align 16
157  ret <2 x double> %0
158}
159
160define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) {
161; SSE-LABEL: test_v4f32_unaligned:
162; SSE:       # %bb.0: # %entry
163; SSE-NEXT:    movups (%rdi), %xmm0
164; SSE-NEXT:    retq
165;
166; AVX-LABEL: test_v4f32_unaligned:
167; AVX:       # %bb.0: # %entry
168; AVX-NEXT:    vmovups (%rdi), %xmm0
169; AVX-NEXT:    retq
170entry:
171  %0 = load <4 x float>, <4 x float>* %V, align 4
172  ret <4 x float> %0
173}
174
175define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) {
176; SSE-LABEL: test_v2f64_unaligned:
177; SSE:       # %bb.0: # %entry
178; SSE-NEXT:    movupd (%rdi), %xmm0
179; SSE-NEXT:    retq
180;
181; AVX-LABEL: test_v2f64_unaligned:
182; AVX:       # %bb.0: # %entry
183; AVX-NEXT:    vmovupd (%rdi), %xmm0
184; AVX-NEXT:    retq
185entry:
186  %0 = load <2 x double>, <2 x double>* %V, align 4
187  ret <2 x double> %0
188}
189
190define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) {
191; SSE-LABEL: test_v16i8_abi_alignment:
192; SSE:       # %bb.0: # %entry
193; SSE-NEXT:    movdqa (%rdi), %xmm0
194; SSE-NEXT:    retq
195;
196; AVX-LABEL: test_v16i8_abi_alignment:
197; AVX:       # %bb.0: # %entry
198; AVX-NEXT:    vmovdqa (%rdi), %xmm0
199; AVX-NEXT:    retq
200entry:
201  %0 = load <16 x i8>, <16 x i8>* %V
202  ret <16 x i8> %0
203}
204
205define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) {
206; SSE-LABEL: test_v8i16_abi_alignment:
207; SSE:       # %bb.0: # %entry
208; SSE-NEXT:    movdqa (%rdi), %xmm0
209; SSE-NEXT:    retq
210;
211; AVX-LABEL: test_v8i16_abi_alignment:
212; AVX:       # %bb.0: # %entry
213; AVX-NEXT:    vmovdqa (%rdi), %xmm0
214; AVX-NEXT:    retq
215entry:
216  %0 = load <8 x i16>, <8 x i16>* %V
217  ret <8 x i16> %0
218}
219
220define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) {
221; SSE-LABEL: test_v4i32_abi_alignment:
222; SSE:       # %bb.0: # %entry
223; SSE-NEXT:    movdqa (%rdi), %xmm0
224; SSE-NEXT:    retq
225;
226; AVX-LABEL: test_v4i32_abi_alignment:
227; AVX:       # %bb.0: # %entry
228; AVX-NEXT:    vmovdqa (%rdi), %xmm0
229; AVX-NEXT:    retq
230entry:
231  %0 = load <4 x i32>, <4 x i32>* %V
232  ret <4 x i32> %0
233}
234
235define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) {
236; SSE-LABEL: test_v2i64_abi_alignment:
237; SSE:       # %bb.0: # %entry
238; SSE-NEXT:    movdqa (%rdi), %xmm0
239; SSE-NEXT:    retq
240;
241; AVX-LABEL: test_v2i64_abi_alignment:
242; AVX:       # %bb.0: # %entry
243; AVX-NEXT:    vmovdqa (%rdi), %xmm0
244; AVX-NEXT:    retq
245entry:
246  %0 = load <2 x i64>, <2 x i64>* %V
247  ret <2 x i64> %0
248}
249
250define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) {
251; SSE-LABEL: test_v4f32_abi_alignment:
252; SSE:       # %bb.0: # %entry
253; SSE-NEXT:    movaps (%rdi), %xmm0
254; SSE-NEXT:    retq
255;
256; AVX-LABEL: test_v4f32_abi_alignment:
257; AVX:       # %bb.0: # %entry
258; AVX-NEXT:    vmovaps (%rdi), %xmm0
259; AVX-NEXT:    retq
260entry:
261  %0 = load <4 x float>, <4 x float>* %V
262  ret <4 x float> %0
263}
264
265define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) {
266; SSE-LABEL: test_v2f64_abi_alignment:
267; SSE:       # %bb.0: # %entry
268; SSE-NEXT:    movapd (%rdi), %xmm0
269; SSE-NEXT:    retq
270;
271; AVX-LABEL: test_v2f64_abi_alignment:
272; AVX:       # %bb.0: # %entry
273; AVX-NEXT:    vmovapd (%rdi), %xmm0
274; AVX-NEXT:    retq
275entry:
276  %0 = load <2 x double>, <2 x double>* %V
277  ret <2 x double> %0
278}
279
280define <32 x i8> @test_v32i8(<32 x i8>* %V) {
281; SSE-LABEL: test_v32i8:
282; SSE:       # %bb.0: # %entry
283; SSE-NEXT:    movaps (%rdi), %xmm0
284; SSE-NEXT:    movaps 16(%rdi), %xmm1
285; SSE-NEXT:    retq
286;
287; AVX-LABEL: test_v32i8:
288; AVX:       # %bb.0: # %entry
289; AVX-NEXT:    vmovdqa (%rdi), %ymm0
290; AVX-NEXT:    retq
291entry:
292  %0 = load <32 x i8>, <32 x i8>* %V, align 32
293  ret <32 x i8> %0
294}
295
296define <16 x i16> @test_v16i16(<16 x i16>* %V) {
297; SSE-LABEL: test_v16i16:
298; SSE:       # %bb.0: # %entry
299; SSE-NEXT:    movaps (%rdi), %xmm0
300; SSE-NEXT:    movaps 16(%rdi), %xmm1
301; SSE-NEXT:    retq
302;
303; AVX-LABEL: test_v16i16:
304; AVX:       # %bb.0: # %entry
305; AVX-NEXT:    vmovdqa (%rdi), %ymm0
306; AVX-NEXT:    retq
307entry:
308  %0 = load <16 x i16>, <16 x i16>* %V, align 32
309  ret <16 x i16> %0
310}
311
312define <8 x i32> @test_v8i32(<8 x i32>* %V) {
313; SSE-LABEL: test_v8i32:
314; SSE:       # %bb.0: # %entry
315; SSE-NEXT:    movaps (%rdi), %xmm0
316; SSE-NEXT:    movaps 16(%rdi), %xmm1
317; SSE-NEXT:    retq
318;
319; AVX-LABEL: test_v8i32:
320; AVX:       # %bb.0: # %entry
321; AVX-NEXT:    vmovdqa (%rdi), %ymm0
322; AVX-NEXT:    retq
323entry:
324  %0 = load <8 x i32>, <8 x i32>* %V, align 32
325  ret <8 x i32> %0
326}
327
328define <4 x i64> @test_v4i64(<4 x i64>* %V) {
329; SSE-LABEL: test_v4i64:
330; SSE:       # %bb.0: # %entry
331; SSE-NEXT:    movaps (%rdi), %xmm0
332; SSE-NEXT:    movaps 16(%rdi), %xmm1
333; SSE-NEXT:    retq
334;
335; AVX-LABEL: test_v4i64:
336; AVX:       # %bb.0: # %entry
337; AVX-NEXT:    vmovdqa (%rdi), %ymm0
338; AVX-NEXT:    retq
339entry:
340  %0 = load <4 x i64>, <4 x i64>* %V, align 32
341  ret <4 x i64> %0
342}
343
344define <32 x i8> @test_v32i8_unaligned(<32 x i8>* %V) {
345; SSE-LABEL: test_v32i8_unaligned:
346; SSE:       # %bb.0: # %entry
347; SSE-NEXT:    movups (%rdi), %xmm0
348; SSE-NEXT:    movups 16(%rdi), %xmm1
349; SSE-NEXT:    retq
350;
351; AVX-LABEL: test_v32i8_unaligned:
352; AVX:       # %bb.0: # %entry
353; AVX-NEXT:    vmovdqu (%rdi), %ymm0
354; AVX-NEXT:    retq
355entry:
356  %0 = load <32 x i8>, <32 x i8>* %V, align 4
357  ret <32 x i8> %0
358}
359
360define <16 x i16> @test_v16i16_unaligned(<16 x i16>* %V) {
361; SSE-LABEL: test_v16i16_unaligned:
362; SSE:       # %bb.0: # %entry
363; SSE-NEXT:    movups (%rdi), %xmm0
364; SSE-NEXT:    movups 16(%rdi), %xmm1
365; SSE-NEXT:    retq
366;
367; AVX-LABEL: test_v16i16_unaligned:
368; AVX:       # %bb.0: # %entry
369; AVX-NEXT:    vmovdqu (%rdi), %ymm0
370; AVX-NEXT:    retq
371entry:
372  %0 = load <16 x i16>, <16 x i16>* %V, align 4
373  ret <16 x i16> %0
374}
375
376define <8 x i32> @test_v8i32_unaligned(<8 x i32>* %V) {
377; SSE-LABEL: test_v8i32_unaligned:
378; SSE:       # %bb.0: # %entry
379; SSE-NEXT:    movups (%rdi), %xmm0
380; SSE-NEXT:    movups 16(%rdi), %xmm1
381; SSE-NEXT:    retq
382;
383; AVX-LABEL: test_v8i32_unaligned:
384; AVX:       # %bb.0: # %entry
385; AVX-NEXT:    vmovdqu (%rdi), %ymm0
386; AVX-NEXT:    retq
387entry:
388  %0 = load <8 x i32>, <8 x i32>* %V, align 4
389  ret <8 x i32> %0
390}
391
392define <4 x i64> @test_v4i64_unaligned(<4 x i64>* %V) {
393; SSE-LABEL: test_v4i64_unaligned:
394; SSE:       # %bb.0: # %entry
395; SSE-NEXT:    movups (%rdi), %xmm0
396; SSE-NEXT:    movups 16(%rdi), %xmm1
397; SSE-NEXT:    retq
398;
399; AVX-LABEL: test_v4i64_unaligned:
400; AVX:       # %bb.0: # %entry
401; AVX-NEXT:    vmovdqu (%rdi), %ymm0
402; AVX-NEXT:    retq
403entry:
404  %0 = load <4 x i64>, <4 x i64>* %V, align 4
405  ret <4 x i64> %0
406}
407
408define <8 x float> @test_v8f32(<8 x float>* %V) {
409; SSE-LABEL: test_v8f32:
410; SSE:       # %bb.0: # %entry
411; SSE-NEXT:    movaps (%rdi), %xmm0
412; SSE-NEXT:    movaps 16(%rdi), %xmm1
413; SSE-NEXT:    retq
414;
415; AVX-LABEL: test_v8f32:
416; AVX:       # %bb.0: # %entry
417; AVX-NEXT:    vmovaps (%rdi), %ymm0
418; AVX-NEXT:    retq
419entry:
420  %0 = load <8 x float>, <8 x float>* %V, align 32
421  ret <8 x float> %0
422}
423
424define <4 x double> @test_v4f64(<4 x double>* %V) {
425; SSE-LABEL: test_v4f64:
426; SSE:       # %bb.0: # %entry
427; SSE-NEXT:    movapd (%rdi), %xmm0
428; SSE-NEXT:    movapd 16(%rdi), %xmm1
429; SSE-NEXT:    retq
430;
431; AVX-LABEL: test_v4f64:
432; AVX:       # %bb.0: # %entry
433; AVX-NEXT:    vmovapd (%rdi), %ymm0
434; AVX-NEXT:    retq
435entry:
436  %0 = load <4 x double>, <4 x double>* %V, align 32
437  ret <4 x double> %0
438}
439
440define <8 x float> @test_v8f32_unaligned(<8 x float>* %V) {
441; SSE-LABEL: test_v8f32_unaligned:
442; SSE:       # %bb.0: # %entry
443; SSE-NEXT:    movups (%rdi), %xmm0
444; SSE-NEXT:    movups 16(%rdi), %xmm1
445; SSE-NEXT:    retq
446;
447; AVX-LABEL: test_v8f32_unaligned:
448; AVX:       # %bb.0: # %entry
449; AVX-NEXT:    vmovups (%rdi), %ymm0
450; AVX-NEXT:    retq
451entry:
452  %0 = load <8 x float>, <8 x float>* %V, align 4
453  ret <8 x float> %0
454}
455
456define <4 x double> @test_v4f64_unaligned(<4 x double>* %V) {
457; SSE-LABEL: test_v4f64_unaligned:
458; SSE:       # %bb.0: # %entry
459; SSE-NEXT:    movupd (%rdi), %xmm0
460; SSE-NEXT:    movupd 16(%rdi), %xmm1
461; SSE-NEXT:    retq
462;
463; AVX-LABEL: test_v4f64_unaligned:
464; AVX:       # %bb.0: # %entry
465; AVX-NEXT:    vmovupd (%rdi), %ymm0
466; AVX-NEXT:    retq
467entry:
468  %0 = load <4 x double>, <4 x double>* %V, align 4
469  ret <4 x double> %0
470}
471
472define <64 x i8> @test_v64i8(<64 x i8>* %V) {
473; SSE-LABEL: test_v64i8:
474; SSE:       # %bb.0: # %entry
475; SSE-NEXT:    movaps (%rdi), %xmm0
476; SSE-NEXT:    movaps 16(%rdi), %xmm1
477; SSE-NEXT:    movaps 32(%rdi), %xmm2
478; SSE-NEXT:    movaps 48(%rdi), %xmm3
479; SSE-NEXT:    retq
480;
481; AVXONLY-LABEL: test_v64i8:
482; AVXONLY:       # %bb.0: # %entry
483; AVXONLY-NEXT:    vmovaps (%rdi), %ymm0
484; AVXONLY-NEXT:    vmovaps 32(%rdi), %ymm1
485; AVXONLY-NEXT:    retq
486;
487; AVX512-LABEL: test_v64i8:
488; AVX512:       # %bb.0: # %entry
489; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
490; AVX512-NEXT:    retq
491entry:
492  %0 = load <64 x i8>, <64 x i8>* %V, align 64
493  ret <64 x i8> %0
494}
495
496define <32 x i16> @test_v32i16(<32 x i16>* %V) {
497; SSE-LABEL: test_v32i16:
498; SSE:       # %bb.0: # %entry
499; SSE-NEXT:    movaps (%rdi), %xmm0
500; SSE-NEXT:    movaps 16(%rdi), %xmm1
501; SSE-NEXT:    movaps 32(%rdi), %xmm2
502; SSE-NEXT:    movaps 48(%rdi), %xmm3
503; SSE-NEXT:    retq
504;
505; AVXONLY-LABEL: test_v32i16:
506; AVXONLY:       # %bb.0: # %entry
507; AVXONLY-NEXT:    vmovaps (%rdi), %ymm0
508; AVXONLY-NEXT:    vmovaps 32(%rdi), %ymm1
509; AVXONLY-NEXT:    retq
510;
511; AVX512-LABEL: test_v32i16:
512; AVX512:       # %bb.0: # %entry
513; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
514; AVX512-NEXT:    retq
515entry:
516  %0 = load <32 x i16>, <32 x i16>* %V, align 64
517  ret <32 x i16> %0
518}
519
520define <16 x i32> @test_v16i32(<16 x i32>* %V) {
521; SSE-LABEL: test_v16i32:
522; SSE:       # %bb.0: # %entry
523; SSE-NEXT:    movaps (%rdi), %xmm0
524; SSE-NEXT:    movaps 16(%rdi), %xmm1
525; SSE-NEXT:    movaps 32(%rdi), %xmm2
526; SSE-NEXT:    movaps 48(%rdi), %xmm3
527; SSE-NEXT:    retq
528;
529; AVXONLY-LABEL: test_v16i32:
530; AVXONLY:       # %bb.0: # %entry
531; AVXONLY-NEXT:    vmovaps (%rdi), %ymm0
532; AVXONLY-NEXT:    vmovaps 32(%rdi), %ymm1
533; AVXONLY-NEXT:    retq
534;
535; AVX512-LABEL: test_v16i32:
536; AVX512:       # %bb.0: # %entry
537; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
538; AVX512-NEXT:    retq
539entry:
540  %0 = load <16 x i32>, <16 x i32>* %V, align 64
541  ret <16 x i32> %0
542}
543
544define <8 x i64> @test_v8i64(<8 x i64>* %V) {
545; SSE-LABEL: test_v8i64:
546; SSE:       # %bb.0: # %entry
547; SSE-NEXT:    movaps (%rdi), %xmm0
548; SSE-NEXT:    movaps 16(%rdi), %xmm1
549; SSE-NEXT:    movaps 32(%rdi), %xmm2
550; SSE-NEXT:    movaps 48(%rdi), %xmm3
551; SSE-NEXT:    retq
552;
553; AVXONLY-LABEL: test_v8i64:
554; AVXONLY:       # %bb.0: # %entry
555; AVXONLY-NEXT:    vmovaps (%rdi), %ymm0
556; AVXONLY-NEXT:    vmovaps 32(%rdi), %ymm1
557; AVXONLY-NEXT:    retq
558;
559; AVX512-LABEL: test_v8i64:
560; AVX512:       # %bb.0: # %entry
561; AVX512-NEXT:    vmovdqa64 (%rdi), %zmm0
562; AVX512-NEXT:    retq
563entry:
564  %0 = load <8 x i64>, <8 x i64>* %V, align 64
565  ret <8 x i64> %0
566}
567
568define <64 x i8> @test_v64i8_unaligned(<64 x i8>* %V) {
569; SSE-LABEL: test_v64i8_unaligned:
570; SSE:       # %bb.0: # %entry
571; SSE-NEXT:    movups (%rdi), %xmm0
572; SSE-NEXT:    movups 16(%rdi), %xmm1
573; SSE-NEXT:    movups 32(%rdi), %xmm2
574; SSE-NEXT:    movups 48(%rdi), %xmm3
575; SSE-NEXT:    retq
576;
577; AVXONLY-LABEL: test_v64i8_unaligned:
578; AVXONLY:       # %bb.0: # %entry
579; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
580; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
581; AVXONLY-NEXT:    retq
582;
583; AVX512-LABEL: test_v64i8_unaligned:
584; AVX512:       # %bb.0: # %entry
585; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
586; AVX512-NEXT:    retq
587entry:
588  %0 = load <64 x i8>, <64 x i8>* %V, align 4
589  ret <64 x i8> %0
590}
591
592define <32 x i16> @test_v32i16_unaligned(<32 x i16>* %V) {
593; SSE-LABEL: test_v32i16_unaligned:
594; SSE:       # %bb.0: # %entry
595; SSE-NEXT:    movups (%rdi), %xmm0
596; SSE-NEXT:    movups 16(%rdi), %xmm1
597; SSE-NEXT:    movups 32(%rdi), %xmm2
598; SSE-NEXT:    movups 48(%rdi), %xmm3
599; SSE-NEXT:    retq
600;
601; AVXONLY-LABEL: test_v32i16_unaligned:
602; AVXONLY:       # %bb.0: # %entry
603; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
604; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
605; AVXONLY-NEXT:    retq
606;
607; AVX512-LABEL: test_v32i16_unaligned:
608; AVX512:       # %bb.0: # %entry
609; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
610; AVX512-NEXT:    retq
611entry:
612  %0 = load <32 x i16>, <32 x i16>* %V, align 4
613  ret <32 x i16> %0
614}
615
616define <16 x i32> @test_v16i32_unaligned(<16 x i32>* %V) {
617; SSE-LABEL: test_v16i32_unaligned:
618; SSE:       # %bb.0: # %entry
619; SSE-NEXT:    movups (%rdi), %xmm0
620; SSE-NEXT:    movups 16(%rdi), %xmm1
621; SSE-NEXT:    movups 32(%rdi), %xmm2
622; SSE-NEXT:    movups 48(%rdi), %xmm3
623; SSE-NEXT:    retq
624;
625; AVXONLY-LABEL: test_v16i32_unaligned:
626; AVXONLY:       # %bb.0: # %entry
627; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
628; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
629; AVXONLY-NEXT:    retq
630;
631; AVX512-LABEL: test_v16i32_unaligned:
632; AVX512:       # %bb.0: # %entry
633; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
634; AVX512-NEXT:    retq
635entry:
636  %0 = load <16 x i32>, <16 x i32>* %V, align 4
637  ret <16 x i32> %0
638}
639
640define <8 x i64> @test_v8i64_unaligned(<8 x i64>* %V) {
641; SSE-LABEL: test_v8i64_unaligned:
642; SSE:       # %bb.0: # %entry
643; SSE-NEXT:    movups (%rdi), %xmm0
644; SSE-NEXT:    movups 16(%rdi), %xmm1
645; SSE-NEXT:    movups 32(%rdi), %xmm2
646; SSE-NEXT:    movups 48(%rdi), %xmm3
647; SSE-NEXT:    retq
648;
649; AVXONLY-LABEL: test_v8i64_unaligned:
650; AVXONLY:       # %bb.0: # %entry
651; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
652; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
653; AVXONLY-NEXT:    retq
654;
655; AVX512-LABEL: test_v8i64_unaligned:
656; AVX512:       # %bb.0: # %entry
657; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0
658; AVX512-NEXT:    retq
659entry:
660  %0 = load <8 x i64>, <8 x i64>* %V, align 4
661  ret <8 x i64> %0
662}
663
664define <8 x float> @test_v16f32(<8 x float>* %V) {
665; SSE-LABEL: test_v16f32:
666; SSE:       # %bb.0: # %entry
667; SSE-NEXT:    movaps (%rdi), %xmm0
668; SSE-NEXT:    movaps 16(%rdi), %xmm1
669; SSE-NEXT:    retq
670;
671; AVX-LABEL: test_v16f32:
672; AVX:       # %bb.0: # %entry
673; AVX-NEXT:    vmovaps (%rdi), %ymm0
674; AVX-NEXT:    retq
675entry:
676  %0 = load <8 x float>, <8 x float>* %V, align 64
677  ret <8 x float> %0
678}
679
680define <8 x double> @test_v8f64(<8 x double>* %V) {
681; SSE-LABEL: test_v8f64:
682; SSE:       # %bb.0: # %entry
683; SSE-NEXT:    movapd (%rdi), %xmm0
684; SSE-NEXT:    movapd 16(%rdi), %xmm1
685; SSE-NEXT:    movapd 32(%rdi), %xmm2
686; SSE-NEXT:    movapd 48(%rdi), %xmm3
687; SSE-NEXT:    retq
688;
689; AVXONLY-LABEL: test_v8f64:
690; AVXONLY:       # %bb.0: # %entry
691; AVXONLY-NEXT:    vmovapd (%rdi), %ymm0
692; AVXONLY-NEXT:    vmovapd 32(%rdi), %ymm1
693; AVXONLY-NEXT:    retq
694;
695; AVX512-LABEL: test_v8f64:
696; AVX512:       # %bb.0: # %entry
697; AVX512-NEXT:    vmovapd (%rdi), %zmm0
698; AVX512-NEXT:    retq
699entry:
700  %0 = load <8 x double>, <8 x double>* %V, align 64
701  ret <8 x double> %0
702}
703
704define <16 x float> @test_v16f32_unaligned(<16 x float>* %V) {
705; SSE-LABEL: test_v16f32_unaligned:
706; SSE:       # %bb.0: # %entry
707; SSE-NEXT:    movups (%rdi), %xmm0
708; SSE-NEXT:    movups 16(%rdi), %xmm1
709; SSE-NEXT:    movups 32(%rdi), %xmm2
710; SSE-NEXT:    movups 48(%rdi), %xmm3
711; SSE-NEXT:    retq
712;
713; AVXONLY-LABEL: test_v16f32_unaligned:
714; AVXONLY:       # %bb.0: # %entry
715; AVXONLY-NEXT:    vmovups (%rdi), %ymm0
716; AVXONLY-NEXT:    vmovups 32(%rdi), %ymm1
717; AVXONLY-NEXT:    retq
718;
719; AVX512-LABEL: test_v16f32_unaligned:
720; AVX512:       # %bb.0: # %entry
721; AVX512-NEXT:    vmovups (%rdi), %zmm0
722; AVX512-NEXT:    retq
723entry:
724  %0 = load <16 x float>, <16 x float>* %V, align 4
725  ret <16 x float> %0
726}
727
728define <8 x double> @test_v8f64_unaligned(<8 x double>* %V) {
729; SSE-LABEL: test_v8f64_unaligned:
730; SSE:       # %bb.0: # %entry
731; SSE-NEXT:    movupd (%rdi), %xmm0
732; SSE-NEXT:    movupd 16(%rdi), %xmm1
733; SSE-NEXT:    movupd 32(%rdi), %xmm2
734; SSE-NEXT:    movupd 48(%rdi), %xmm3
735; SSE-NEXT:    retq
736;
737; AVXONLY-LABEL: test_v8f64_unaligned:
738; AVXONLY:       # %bb.0: # %entry
739; AVXONLY-NEXT:    vmovupd (%rdi), %ymm0
740; AVXONLY-NEXT:    vmovupd 32(%rdi), %ymm1
741; AVXONLY-NEXT:    retq
742;
743; AVX512-LABEL: test_v8f64_unaligned:
744; AVX512:       # %bb.0: # %entry
745; AVX512-NEXT:    vmovupd (%rdi), %zmm0
746; AVX512-NEXT:    retq
747entry:
748  %0 = load <8 x double>, <8 x double>* %V, align 4
749  ret <8 x double> %0
750}
751
752