• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-pc-linux  -mattr=+avx -interleaved-access -S | FileCheck %s
3
4; This file tests the function `llvm::lowerInterleavedLoad/Store`.
5
6define <4 x double> @load_factorf64_4(<16 x double>* %ptr) {
7; CHECK-LABEL: @load_factorf64_4(
8; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x double>* [[PTR:%.*]] to <4 x double>*
9; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 0
10; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[TMP2]], align 16
11; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 1
12; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 16
13; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 2
14; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x double>, <4 x double>* [[TMP6]], align 16
15; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 3
16; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[TMP8]], align 16
17; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
18; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
19; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP7]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
20; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP9]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
21; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP11]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
22; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP13]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
23; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP11]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
24; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP13]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
25; CHECK-NEXT:    [[ADD1:%.*]] = fadd <4 x double> [[TMP14]], [[TMP16]]
26; CHECK-NEXT:    [[ADD2:%.*]] = fadd <4 x double> [[ADD1]], [[TMP15]]
27; CHECK-NEXT:    [[ADD3:%.*]] = fadd <4 x double> [[ADD2]], [[TMP17]]
28; CHECK-NEXT:    ret <4 x double> [[ADD3]]
29;
30  %wide.vec = load <16 x double>, <16 x double>* %ptr, align 16
31  %strided.v0 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
32  %strided.v1 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
33  %strided.v2 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
34  %strided.v3 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
35  %add1 = fadd <4 x double> %strided.v0, %strided.v1
36  %add2 = fadd <4 x double> %add1, %strided.v2
37  %add3 = fadd <4 x double> %add2, %strided.v3
38  ret <4 x double> %add3
39}
40
41define <4 x i64> @load_factori64_4(<16 x i64>* %ptr) {
42; CHECK-LABEL: @load_factori64_4(
43; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i64>* [[PTR:%.*]] to <4 x i64>*
44; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[TMP1]], i32 0
45; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 16
46; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[TMP1]], i32 1
47; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i64>, <4 x i64>* [[TMP4]], align 16
48; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[TMP1]], i32 2
49; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i64>, <4 x i64>* [[TMP6]], align 16
50; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[TMP1]], i32 3
51; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i64>, <4 x i64>* [[TMP8]], align 16
52; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
53; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
54; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> [[TMP7]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
55; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP9]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
56; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> [[TMP11]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
57; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> [[TMP13]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
58; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> [[TMP11]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
59; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> [[TMP13]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
60; CHECK-NEXT:    [[ADD1:%.*]] = add <4 x i64> [[TMP14]], [[TMP16]]
61; CHECK-NEXT:    [[ADD2:%.*]] = add <4 x i64> [[ADD1]], [[TMP15]]
62; CHECK-NEXT:    [[ADD3:%.*]] = add <4 x i64> [[ADD2]], [[TMP17]]
63; CHECK-NEXT:    ret <4 x i64> [[ADD3]]
64;
65  %wide.vec = load <16 x i64>, <16 x i64>* %ptr, align 16
66  %strided.v0 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
67  %strided.v1 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
68  %strided.v2 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
69  %strided.v3 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
70  %add1 = add <4 x i64> %strided.v0, %strided.v1
71  %add2 = add <4 x i64> %add1, %strided.v2
72  %add3 = add <4 x i64> %add2, %strided.v3
73  ret <4 x i64> %add3
74}
75
76define <4 x double> @load_factorf64_1(<16 x double>* %ptr) {
77; CHECK-LABEL: @load_factorf64_1(
78; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x double>* [[PTR:%.*]] to <4 x double>*
79; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 0
80; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[TMP2]], align 16
81; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 1
82; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 16
83; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 2
84; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x double>, <4 x double>* [[TMP6]], align 16
85; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr <4 x double>, <4 x double>* [[TMP1]], i32 3
86; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x double>, <4 x double>* [[TMP8]], align 16
87; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
88; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
89; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP7]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
90; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP9]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
91; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP11]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
92; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP13]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
93; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP11]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
94; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP13]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
95; CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x double> [[TMP14]], [[TMP14]]
96; CHECK-NEXT:    ret <4 x double> [[MUL]]
97;
98  %wide.vec = load <16 x double>, <16 x double>* %ptr, align 16
99  %strided.v0 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
100  %strided.v3 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
101  %mul = fmul <4 x double> %strided.v0, %strided.v3
102  ret <4 x double> %mul
103}
104
105define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) {
106; CHECK-LABEL: @store_factorf64_4(
107; CHECK-NEXT:    [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
108; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
109; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
110; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
111; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
112; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
113; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
114; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
115; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
116; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
117; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
118; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
119; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
120; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
121; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
122; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
123; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
124; CHECK-NEXT:    store <16 x double> [[TMP15]], <16 x double>* [[PTR:%.*]], align 16
125; CHECK-NEXT:    ret void
126;
127  %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
128  %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
129  %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
130  store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
131  ret void
132}
133
134define void @store_factori64_4(<16 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2, <4 x i64> %v3) {
135; CHECK-LABEL: @store_factori64_4(
136; CHECK-NEXT:    [[S0:%.*]] = shufflevector <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
137; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x i64> [[V2:%.*]], <4 x i64> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
138; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
139; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
140; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
141; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
142; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
143; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
144; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
145; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
146; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
147; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
148; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
149; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
150; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
151; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
152; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <8 x i64> [[TMP13]], <8 x i64> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
153; CHECK-NEXT:    store <16 x i64> [[TMP15]], <16 x i64>* [[PTR:%.*]], align 16
154; CHECK-NEXT:    ret void
155;
156  %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
157  %s1 = shufflevector <4 x i64> %v2, <4 x i64> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
158  %interleaved.vec = shufflevector <8 x i64> %s0, <8 x i64> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
159  store <16 x i64> %interleaved.vec, <16 x i64>* %ptr, align 16
160  ret void
161}
162
163define void @store_factorf64_4_revMask(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) {
164; CHECK-LABEL: @store_factorf64_4_revMask(
165; CHECK-NEXT:    [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
166; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
167; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
168; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
169; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
170; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
171; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
172; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
173; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
174; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
175; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
176; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
177; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
178; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
179; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
180; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
181; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
182; CHECK-NEXT:    store <16 x double> [[TMP15]], <16 x double>* [[PTR:%.*]], align 16
183; CHECK-NEXT:    ret void
184;
185  %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
186  %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
187  %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 12, i32 8, i32 4, i32 0, i32 13, i32 9, i32 5, i32 1, i32 14, i32 10, i32 6, i32 2, i32 15, i32 11, i32 7, i32 3>
188  store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
189  ret void
190}
191
192define void @store_factorf64_4_arbitraryMask(<16 x double>* %ptr, <16 x double> %v0, <16 x double> %v1, <16 x double> %v2, <16 x double> %v3) {
193; CHECK-LABEL: @store_factorf64_4_arbitraryMask(
194; CHECK-NEXT:    [[S0:%.*]] = shufflevector <16 x double> [[V0:%.*]], <16 x double> [[V1:%.*]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
195; CHECK-NEXT:    [[S1:%.*]] = shufflevector <16 x double> [[V2:%.*]], <16 x double> [[V3:%.*]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
196; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
197; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35>
198; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
199; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
200; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
201; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
202; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
203; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
204; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
205; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
206; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
207; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
208; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
209; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
210; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
211; CHECK-NEXT:    store <16 x double> [[TMP15]], <16 x double>* [[PTR:%.*]], align 16
212; CHECK-NEXT:    ret void
213;
214  %s0 = shufflevector <16 x double> %v0, <16 x double> %v1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
215  %s1 = shufflevector <16 x double> %v2, <16 x double> %v3, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
216  %interleaved.vec = shufflevector <32 x double> %s0, <32 x double> %s1, <16 x i32> <i32 4, i32 32, i32 16, i32 8, i32 5, i32 33, i32 17, i32 9, i32 6, i32 34, i32 18, i32 10, i32 7, i32 35, i32 19, i32 11>
217  store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
218  ret void
219}
220
221; This verifies whether the test passes and does not hit any assertions.
222; Today, X86InterleavedAccess could have handled this case and
223; generate transposed sequence by extending the current implementation
224; which would be creating dummy vectors of undef. But it decided not to
225; optimize these cases where the load-size is less than Factor * NumberOfElements.
226; Because a better sequence can easily be generated by CG.
227
228@a = local_unnamed_addr global <4 x double> zeroinitializer, align 32
229; Function Attrs: norecurse nounwind readonly uwtable
230define <4 x double> @test_unhandled(<4 x double> %b) {
231; CHECK-LABEL: @test_unhandled(
232; CHECK-NEXT:  entry:
233; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x double>, <4 x double>* @a, align 32
234; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
235; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 0, i32 0>
236; CHECK-NEXT:    ret <4 x double> [[SHUFFLE]]
237;
238entry:
239  %0 = load <4 x double>, <4 x double>* @a, align 32
240  %1 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
241  %shuffle = shufflevector <4 x double> %1, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
242  ret <4 x double> %shuffle
243}
244