• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This is a smoke test of nop insertion.
2
3; REQUIRES: allow_dump
4
5; Use filetype=asm because this currently depends on the /* variant */
6; assembler comment.
7
8; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \
9; RUN:    -nop-insertion-percentage=50 -max-nops-per-instruction=1 \
10; RUN:    | FileCheck %s --check-prefix=PROB50
11; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \
12; RUN:    -nop-insertion-percentage=90 -max-nops-per-instruction=1 \
13; RUN:    | FileCheck %s --check-prefix=PROB90
14; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \
15; RUN:    -nop-insertion-percentage=50 -max-nops-per-instruction=2 \
16; RUN:    | FileCheck %s --check-prefix=MAXNOPS2
17; RUN: %p2i -i %s --filetype=asm --sandbox -a -sz-seed=1 -nop-insertion \
18; RUN:    -nop-insertion-percentage=50 -max-nops-per-instruction=1 \
19; RUN:    | FileCheck %s --check-prefix=SANDBOX50
20; RUN: %p2i -i %s --filetype=asm --sandbox --target=arm32 -a -sz-seed=1 \
21; RUN:    -nop-insertion -nop-insertion-percentage=110 \
22; RUN:    -max-nops-per-instruction=2 \
23; RUN:    | FileCheck %s --check-prefix=ARM110P2
24
25
26define internal <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
27entry:
28  %res = mul <4 x i32> %a, %b
29  ret <4 x i32> %res
30
31; PROB50-LABEL: mul_v4i32
32; PROB50: nop /* variant = 1 */
33; PROB50: subl $60, %esp
34; PROB50: nop /* variant = 3 */
35; PROB50: movups %xmm0, 32(%esp)
36; PROB50: movups %xmm1, 16(%esp)
37; PROB50: movups 32(%esp), %xmm0
38; PROB50: nop /* variant = 1 */
39; PROB50: pshufd $49, 32(%esp), %xmm1
40; PROB50: nop /* variant = 4 */
41; PROB50: pshufd $49, 16(%esp), %xmm2
42; PROB50: nop /* variant = 1 */
43; PROB50: pmuludq 16(%esp), %xmm0
44; PROB50: pmuludq %xmm2, %xmm1
45; PROB50: nop /* variant = 0 */
46; PROB50: shufps $136, %xmm1, %xmm0
47; PROB50: nop /* variant = 3 */
48; PROB50: pshufd $216, %xmm0, %xmm0
49; PROB50: nop /* variant = 1 */
50; PROB50: movups %xmm0, (%esp)
51; PROB50: movups (%esp), %xmm0
52; PROB50: addl $60, %esp
53; PROB50: ret
54
55; PROB90-LABEL: mul_v4i32
56; PROB90: nop /* variant = 1 */
57; PROB90: subl $60, %esp
58; PROB90: nop /* variant = 3 */
59; PROB90: movups %xmm0, 32(%esp)
60; PROB90: nop /* variant = 4 */
61; PROB90: movups %xmm1, 16(%esp)
62; PROB90: nop /* variant = 1 */
63; PROB90: movups 32(%esp), %xmm0
64; PROB90: nop /* variant = 4 */
65; PROB90: pshufd $49, 32(%esp), %xmm1
66; PROB90: nop /* variant = 1 */
67; PROB90: pshufd $49, 16(%esp), %xmm2
68; PROB90: nop /* variant = 4 */
69; PROB90: pmuludq 16(%esp), %xmm0
70; PROB90: nop /* variant = 2 */
71; PROB90: pmuludq %xmm2, %xmm1
72; PROB90: shufps $136, %xmm1, %xmm0
73; PROB90: nop /* variant = 1 */
74; PROB90: pshufd $216, %xmm0, %xmm0
75; PROB90: movups %xmm0, (%esp)
76; PROB90: nop /* variant = 1 */
77; PROB90: movups (%esp), %xmm0
78; PROB90: nop /* variant = 0 */
79; PROB90: addl $60, %esp
80; PROB90: nop /* variant = 0 */
81; PROB90: ret
82; PROB90: nop /* variant = 4 */
83
84; MAXNOPS2-LABEL: mul_v4i32
85; MAXNOPS2: nop /* variant = 1 */
86; MAXNOPS2: nop /* variant = 3 */
87; MAXNOPS2: subl $60, %esp
88; MAXNOPS2: movups %xmm0, 32(%esp)
89; MAXNOPS2: nop /* variant = 1 */
90; MAXNOPS2: nop /* variant = 4 */
91; MAXNOPS2: movups %xmm1, 16(%esp)
92; MAXNOPS2: nop /* variant = 1 */
93; MAXNOPS2: movups 32(%esp), %xmm0
94; MAXNOPS2: nop /* variant = 0 */
95; MAXNOPS2: nop /* variant = 3 */
96; MAXNOPS2: pshufd $49, 32(%esp), %xmm1
97; MAXNOPS2: nop /* variant = 1 */
98; MAXNOPS2: pshufd $49, 16(%esp), %xmm2
99; MAXNOPS2: pmuludq 16(%esp), %xmm0
100; MAXNOPS2: pmuludq %xmm2, %xmm1
101; MAXNOPS2: nop /* variant = 0 */
102; MAXNOPS2: shufps $136, %xmm1, %xmm0
103; MAXNOPS2: nop /* variant = 0 */
104; MAXNOPS2: nop /* variant = 0 */
105; MAXNOPS2: pshufd $216, %xmm0, %xmm0
106; MAXNOPS2: nop /* variant = 1 */
107; MAXNOPS2: nop /* variant = 3 */
108; MAXNOPS2: movups %xmm0, (%esp)
109; MAXNOPS2: nop /* variant = 3 */
110; MAXNOPS2: movups (%esp), %xmm0
111; MAXNOPS2: addl $60, %esp
112; MAXNOPS2: nop /* variant = 3 */
113; MAXNOPS2: ret
114
115
116; SANDBOX50-LABEL: mul_v4i32
117; SANDBOX50: nop /* variant = 1 */
118; SANDBOX50: subl $60, %esp
119; SANDBOX50: nop /* variant = 3 */
120; SANDBOX50: movups %xmm0, 32(%esp)
121; SANDBOX50: movups %xmm1, 16(%esp)
122; SANDBOX50: movups 32(%esp), %xmm0
123; SANDBOX50: nop /* variant = 1 */
124; SANDBOX50: pshufd $49, 32(%esp), %xmm1
125; SANDBOX50: nop /* variant = 4 */
126; SANDBOX50: pshufd $49, 16(%esp), %xmm2
127; SANDBOX50: nop /* variant = 1 */
128; SANDBOX50: pmuludq 16(%esp), %xmm0
129; SANDBOX50: pmuludq %xmm2, %xmm1
130; SANDBOX50: nop /* variant = 0 */
131; SANDBOX50: shufps $136, %xmm1, %xmm0
132; SANDBOX50: nop /* variant = 3 */
133; SANDBOX50: pshufd $216, %xmm0, %xmm0
134; SANDBOX50: nop /* variant = 1 */
135; SANDBOX50: movups %xmm0, (%esp)
136; SANDBOX50: movups (%esp), %xmm0
137; SANDBOX50: addl $60, %esp
138; SANDBOX50: pop %ecx
139; SANDBOX50: .bundle_lock
140; SANDBOX50: andl $-32, %ecx
141; SANDBOX50: jmp *%ecx
142; SANDBOX50: .bundle_unlock
143
144; ARM110P2:       mul_v4i32:
145; ARM110P2-NEXT: .Lmul_v4i32$entry:
146; ARM110P2-NEXT:        .bundle_lock
147; ARM110P2-NEXT:        sub     sp, sp, #48
148; ARM110P2-NEXT:        bic     sp, sp, #3221225472
149; ARM110P2-NEXT:        .bundle_unlock
150; ARM110P2-NEXT:        nop
151; ARM110P2-NEXT:        nop
152; ARM110P2-NEXT:        add     ip, sp, #32
153; ARM110P2-NEXT:        nop
154; ARM110P2-NEXT:        nop
155; ARM110P2-NEXT:        .bundle_lock
156; ARM110P2-NEXT:        bic     ip, ip, #3221225472
157; ARM110P2-NEXT:        vst1.32 q0, [ip]
158; ARM110P2-NEXT:        .bundle_unlock
159; ARM110P2-NEXT:        nop
160; ARM110P2-NEXT:        nop
161; ARM110P2-NEXT:        # [sp, #32] = def.pseudo
162; ARM110P2-NEXT:        add     ip, sp, #16
163; ARM110P2-NEXT:        nop
164; ARM110P2-NEXT:        nop
165; ARM110P2-NEXT:        .bundle_lock
166; ARM110P2-NEXT:        bic     ip, ip, #3221225472
167; ARM110P2-NEXT:        vst1.32 q1, [ip]
168; ARM110P2-NEXT:        .bundle_unlock
169; ARM110P2-NEXT:        nop
170; ARM110P2-NEXT:        nop
171; ARM110P2-NEXT:        # [sp, #16] = def.pseudo
172; ARM110P2-NEXT:        add     ip, sp, #32
173; ARM110P2-NEXT:        nop
174; ARM110P2-NEXT:        nop
175; ARM110P2-NEXT:        .bundle_lock
176; ARM110P2-NEXT:        bic     ip, ip, #3221225472
177; ARM110P2-NEXT:        vld1.32 q0, [ip]
178; ARM110P2-NEXT:        .bundle_unlock
179; ARM110P2-NEXT:        nop
180; ARM110P2-NEXT:        nop
181; ARM110P2-NEXT:        add     ip, sp, #16
182; ARM110P2-NEXT:        nop
183; ARM110P2-NEXT:        nop
184; ARM110P2-NEXT:        .bundle_lock
185; ARM110P2-NEXT:        bic     ip, ip, #3221225472
186; ARM110P2-NEXT:        vld1.32 q1, [ip]
187; ARM110P2-NEXT:        .bundle_unlock
188; ARM110P2-NEXT:        nop
189; ARM110P2-NEXT:        nop
190; ARM110P2-NEXT:        vmul.i32        q0, q0, q1
191; ARM110P2-NEXT:        nop
192; ARM110P2-NEXT:        nop
193; ARM110P2-NEXT:        vst1.32 q0, [sp]
194; ARM110P2-NEXT:        nop
195; ARM110P2-NEXT:        nop
196; ARM110P2-NEXT:        # [sp] = def.pseudo
197; ARM110P2-NEXT:        vld1.32 q0, [sp]
198; ARM110P2-NEXT:        nop
199; ARM110P2-NEXT:        nop
200; ARM110P2-NEXT:        .bundle_lock
201; ARM110P2-NEXT:        add     sp, sp, #48
202; ARM110P2-NEXT:        bic     sp, sp, #3221225472
203; ARM110P2-NEXT:        .bundle_unlock
204; ARM110P2-NEXT:        nop
205; ARM110P2-NEXT:        nop
206; ARM110P2-NEXT:        .bundle_lock
207; ARM110P2-NEXT:        bic     lr, lr, #3221225487
208; ARM110P2-NEXT:        bx      lr
209; ARM110P2-NEXT:        .bundle_unlock
210; ARM110P2-NEXT:        nop
211; ARM110P2-NEXT:        nop
212
213}
214