1; This is a smoke test of nop insertion. 2 3; REQUIRES: allow_dump 4 5; Use filetype=asm because this currently depends on the /* variant */ 6; assembler comment. 7 8; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \ 9; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=1 \ 10; RUN: | FileCheck %s --check-prefix=PROB50 11; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \ 12; RUN: -nop-insertion-percentage=90 -max-nops-per-instruction=1 \ 13; RUN: | FileCheck %s --check-prefix=PROB90 14; RUN: %p2i -i %s --filetype=asm -a -sz-seed=1 -nop-insertion \ 15; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=2 \ 16; RUN: | FileCheck %s --check-prefix=MAXNOPS2 17; RUN: %p2i -i %s --filetype=asm --sandbox -a -sz-seed=1 -nop-insertion \ 18; RUN: -nop-insertion-percentage=50 -max-nops-per-instruction=1 \ 19; RUN: | FileCheck %s --check-prefix=SANDBOX50 20; RUN: %p2i -i %s --filetype=asm --sandbox --target=arm32 -a -sz-seed=1 \ 21; RUN: -nop-insertion -nop-insertion-percentage=110 \ 22; RUN: -max-nops-per-instruction=2 \ 23; RUN: | FileCheck %s --check-prefix=ARM110P2 24 25 26define internal <4 x i32> @mul_v4i32(<4 x i32> %a, <4 x i32> %b) { 27entry: 28 %res = mul <4 x i32> %a, %b 29 ret <4 x i32> %res 30 31; PROB50-LABEL: mul_v4i32 32; PROB50: nop /* variant = 1 */ 33; PROB50: subl $60, %esp 34; PROB50: nop /* variant = 3 */ 35; PROB50: movups %xmm0, 32(%esp) 36; PROB50: movups %xmm1, 16(%esp) 37; PROB50: movups 32(%esp), %xmm0 38; PROB50: nop /* variant = 1 */ 39; PROB50: pshufd $49, 32(%esp), %xmm1 40; PROB50: nop /* variant = 4 */ 41; PROB50: pshufd $49, 16(%esp), %xmm2 42; PROB50: nop /* variant = 1 */ 43; PROB50: pmuludq 16(%esp), %xmm0 44; PROB50: pmuludq %xmm2, %xmm1 45; PROB50: nop /* variant = 0 */ 46; PROB50: shufps $136, %xmm1, %xmm0 47; PROB50: nop /* variant = 3 */ 48; PROB50: pshufd $216, %xmm0, %xmm0 49; PROB50: nop /* variant = 1 */ 50; PROB50: movups %xmm0, (%esp) 51; PROB50: movups (%esp), %xmm0 52; PROB50: addl $60, %esp 53; PROB50: ret 54 55; PROB90-LABEL: mul_v4i32 56; PROB90: nop /* variant = 1 */ 57; PROB90: subl $60, %esp 58; PROB90: nop /* variant = 3 */ 59; PROB90: movups %xmm0, 32(%esp) 60; PROB90: nop /* variant = 4 */ 61; PROB90: movups %xmm1, 16(%esp) 62; PROB90: nop /* variant = 1 */ 63; PROB90: movups 32(%esp), %xmm0 64; PROB90: nop /* variant = 4 */ 65; PROB90: pshufd $49, 32(%esp), %xmm1 66; PROB90: nop /* variant = 1 */ 67; PROB90: pshufd $49, 16(%esp), %xmm2 68; PROB90: nop /* variant = 4 */ 69; PROB90: pmuludq 16(%esp), %xmm0 70; PROB90: nop /* variant = 2 */ 71; PROB90: pmuludq %xmm2, %xmm1 72; PROB90: shufps $136, %xmm1, %xmm0 73; PROB90: nop /* variant = 1 */ 74; PROB90: pshufd $216, %xmm0, %xmm0 75; PROB90: movups %xmm0, (%esp) 76; PROB90: nop /* variant = 1 */ 77; PROB90: movups (%esp), %xmm0 78; PROB90: nop /* variant = 0 */ 79; PROB90: addl $60, %esp 80; PROB90: nop /* variant = 0 */ 81; PROB90: ret 82; PROB90: nop /* variant = 4 */ 83 84; MAXNOPS2-LABEL: mul_v4i32 85; MAXNOPS2: nop /* variant = 1 */ 86; MAXNOPS2: nop /* variant = 3 */ 87; MAXNOPS2: subl $60, %esp 88; MAXNOPS2: movups %xmm0, 32(%esp) 89; MAXNOPS2: nop /* variant = 1 */ 90; MAXNOPS2: nop /* variant = 4 */ 91; MAXNOPS2: movups %xmm1, 16(%esp) 92; MAXNOPS2: nop /* variant = 1 */ 93; MAXNOPS2: movups 32(%esp), %xmm0 94; MAXNOPS2: nop /* variant = 0 */ 95; MAXNOPS2: nop /* variant = 3 */ 96; MAXNOPS2: pshufd $49, 32(%esp), %xmm1 97; MAXNOPS2: nop /* variant = 1 */ 98; MAXNOPS2: pshufd $49, 16(%esp), %xmm2 99; MAXNOPS2: pmuludq 16(%esp), %xmm0 100; MAXNOPS2: pmuludq %xmm2, %xmm1 101; MAXNOPS2: nop /* variant = 0 */ 102; MAXNOPS2: shufps $136, %xmm1, %xmm0 103; MAXNOPS2: nop /* variant = 0 */ 104; MAXNOPS2: nop /* variant = 0 */ 105; MAXNOPS2: pshufd $216, %xmm0, %xmm0 106; MAXNOPS2: nop /* variant = 1 */ 107; MAXNOPS2: nop /* variant = 3 */ 108; MAXNOPS2: movups %xmm0, (%esp) 109; MAXNOPS2: nop /* variant = 3 */ 110; MAXNOPS2: movups (%esp), %xmm0 111; MAXNOPS2: addl $60, %esp 112; MAXNOPS2: nop /* variant = 3 */ 113; MAXNOPS2: ret 114 115 116; SANDBOX50-LABEL: mul_v4i32 117; SANDBOX50: nop /* variant = 1 */ 118; SANDBOX50: subl $60, %esp 119; SANDBOX50: nop /* variant = 3 */ 120; SANDBOX50: movups %xmm0, 32(%esp) 121; SANDBOX50: movups %xmm1, 16(%esp) 122; SANDBOX50: movups 32(%esp), %xmm0 123; SANDBOX50: nop /* variant = 1 */ 124; SANDBOX50: pshufd $49, 32(%esp), %xmm1 125; SANDBOX50: nop /* variant = 4 */ 126; SANDBOX50: pshufd $49, 16(%esp), %xmm2 127; SANDBOX50: nop /* variant = 1 */ 128; SANDBOX50: pmuludq 16(%esp), %xmm0 129; SANDBOX50: pmuludq %xmm2, %xmm1 130; SANDBOX50: nop /* variant = 0 */ 131; SANDBOX50: shufps $136, %xmm1, %xmm0 132; SANDBOX50: nop /* variant = 3 */ 133; SANDBOX50: pshufd $216, %xmm0, %xmm0 134; SANDBOX50: nop /* variant = 1 */ 135; SANDBOX50: movups %xmm0, (%esp) 136; SANDBOX50: movups (%esp), %xmm0 137; SANDBOX50: addl $60, %esp 138; SANDBOX50: pop %ecx 139; SANDBOX50: .bundle_lock 140; SANDBOX50: andl $-32, %ecx 141; SANDBOX50: jmp *%ecx 142; SANDBOX50: .bundle_unlock 143 144; ARM110P2: mul_v4i32: 145; ARM110P2-NEXT: .Lmul_v4i32$entry: 146; ARM110P2-NEXT: .bundle_lock 147; ARM110P2-NEXT: sub sp, sp, #48 148; ARM110P2-NEXT: bic sp, sp, #3221225472 149; ARM110P2-NEXT: .bundle_unlock 150; ARM110P2-NEXT: nop 151; ARM110P2-NEXT: nop 152; ARM110P2-NEXT: add ip, sp, #32 153; ARM110P2-NEXT: nop 154; ARM110P2-NEXT: nop 155; ARM110P2-NEXT: .bundle_lock 156; ARM110P2-NEXT: bic ip, ip, #3221225472 157; ARM110P2-NEXT: vst1.32 q0, [ip] 158; ARM110P2-NEXT: .bundle_unlock 159; ARM110P2-NEXT: nop 160; ARM110P2-NEXT: nop 161; ARM110P2-NEXT: # [sp, #32] = def.pseudo 162; ARM110P2-NEXT: add ip, sp, #16 163; ARM110P2-NEXT: nop 164; ARM110P2-NEXT: nop 165; ARM110P2-NEXT: .bundle_lock 166; ARM110P2-NEXT: bic ip, ip, #3221225472 167; ARM110P2-NEXT: vst1.32 q1, [ip] 168; ARM110P2-NEXT: .bundle_unlock 169; ARM110P2-NEXT: nop 170; ARM110P2-NEXT: nop 171; ARM110P2-NEXT: # [sp, #16] = def.pseudo 172; ARM110P2-NEXT: add ip, sp, #32 173; ARM110P2-NEXT: nop 174; ARM110P2-NEXT: nop 175; ARM110P2-NEXT: .bundle_lock 176; ARM110P2-NEXT: bic ip, ip, #3221225472 177; ARM110P2-NEXT: vld1.32 q0, [ip] 178; ARM110P2-NEXT: .bundle_unlock 179; ARM110P2-NEXT: nop 180; ARM110P2-NEXT: nop 181; ARM110P2-NEXT: add ip, sp, #16 182; ARM110P2-NEXT: nop 183; ARM110P2-NEXT: nop 184; ARM110P2-NEXT: .bundle_lock 185; ARM110P2-NEXT: bic ip, ip, #3221225472 186; ARM110P2-NEXT: vld1.32 q1, [ip] 187; ARM110P2-NEXT: .bundle_unlock 188; ARM110P2-NEXT: nop 189; ARM110P2-NEXT: nop 190; ARM110P2-NEXT: vmul.i32 q0, q0, q1 191; ARM110P2-NEXT: nop 192; ARM110P2-NEXT: nop 193; ARM110P2-NEXT: vst1.32 q0, [sp] 194; ARM110P2-NEXT: nop 195; ARM110P2-NEXT: nop 196; ARM110P2-NEXT: # [sp] = def.pseudo 197; ARM110P2-NEXT: vld1.32 q0, [sp] 198; ARM110P2-NEXT: nop 199; ARM110P2-NEXT: nop 200; ARM110P2-NEXT: .bundle_lock 201; ARM110P2-NEXT: add sp, sp, #48 202; ARM110P2-NEXT: bic sp, sp, #3221225472 203; ARM110P2-NEXT: .bundle_unlock 204; ARM110P2-NEXT: nop 205; ARM110P2-NEXT: nop 206; ARM110P2-NEXT: .bundle_lock 207; ARM110P2-NEXT: bic lr, lr, #3221225487 208; ARM110P2-NEXT: bx lr 209; ARM110P2-NEXT: .bundle_unlock 210; ARM110P2-NEXT: nop 211; ARM110P2-NEXT: nop 212 213} 214