1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=WIN64 4; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=LINUXOSX64 5 6; Test regcall when receiving arguments of v64i1 type 7define x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, <64 x i1> %x12) { 8; X32-LABEL: test_argv64i1: 9; X32: # %bb.0: 10; X32-NEXT: addl %edx, %eax 11; X32-NEXT: adcl %edi, %ecx 12; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 13; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 14; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 15; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 16; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 17; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 18; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 19; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 20; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 21; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 22; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 23; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 24; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 25; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 26; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 27; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 28; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 29; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 30; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 31; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 32; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 33; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 34; X32-NEXT: retl 35; 36; WIN64-LABEL: test_argv64i1: 37; WIN64: # %bb.0: 38; WIN64-NEXT: addq %rcx, %rax 39; WIN64-NEXT: addq %rdx, %rax 40; WIN64-NEXT: addq %rdi, %rax 41; WIN64-NEXT: addq %rsi, %rax 42; WIN64-NEXT: addq %r8, %rax 43; WIN64-NEXT: addq %r9, %rax 44; WIN64-NEXT: addq %r10, %rax 45; WIN64-NEXT: addq %r11, %rax 46; WIN64-NEXT: addq %r12, %rax 47; WIN64-NEXT: addq %r14, %rax 48; WIN64-NEXT: addq %r15, %rax 49; WIN64-NEXT: addq {{[0-9]+}}(%rsp), %rax 50; WIN64-NEXT: retq 51; 52; LINUXOSX64-LABEL: test_argv64i1: 53; LINUXOSX64: # %bb.0: 54; LINUXOSX64-NEXT: addq %rcx, %rax 55; LINUXOSX64-NEXT: addq %rdx, %rax 56; LINUXOSX64-NEXT: addq %rdi, %rax 57; LINUXOSX64-NEXT: addq %rsi, %rax 58; LINUXOSX64-NEXT: addq %r8, %rax 59; LINUXOSX64-NEXT: addq %r9, %rax 60; LINUXOSX64-NEXT: addq %r12, %rax 61; LINUXOSX64-NEXT: addq %r13, %rax 62; LINUXOSX64-NEXT: addq %r14, %rax 63; LINUXOSX64-NEXT: addq %r15, %rax 64; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax 65; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax 66; LINUXOSX64-NEXT: retq 67 %y0 = bitcast <64 x i1> %x0 to i64 68 %y1 = bitcast <64 x i1> %x1 to i64 69 %y2 = bitcast <64 x i1> %x2 to i64 70 %y3 = bitcast <64 x i1> %x3 to i64 71 %y4 = bitcast <64 x i1> %x4 to i64 72 %y5 = bitcast <64 x i1> %x5 to i64 73 %y6 = bitcast <64 x i1> %x6 to i64 74 %y7 = bitcast <64 x i1> %x7 to i64 75 %y8 = bitcast <64 x i1> %x8 to i64 76 %y9 = bitcast <64 x i1> %x9 to i64 77 %y10 = bitcast <64 x i1> %x10 to i64 78 %y11 = bitcast <64 x i1> %x11 to i64 79 %y12 = bitcast <64 x i1> %x12 to i64 80 %add1 = add i64 %y0, %y1 81 %add2 = add i64 %add1, %y2 82 %add3 = add i64 %add2, %y3 83 %add4 = add i64 %add3, %y4 84 %add5 = add i64 %add4, %y5 85 %add6 = add i64 %add5, %y6 86 %add7 = add i64 %add6, %y7 87 %add8 = add i64 %add7, %y8 88 %add9 = add i64 %add8, %y9 89 %add10 = add i64 %add9, %y10 90 %add11 = add i64 %add10, %y11 91 %add12 = add i64 %add11, %y12 92 ret i64 %add12 93} 94 95; Test regcall when passing arguments of v64i1 type 96define i64 @caller_argv64i1() #0 { 97; X32-LABEL: caller_argv64i1: 98; X32: # %bb.0: # %entry 99; X32-NEXT: pushl %edi 100; X32-NEXT: subl $88, %esp 101; X32-NEXT: vmovaps {{.*#+}} xmm0 = [2,1,2,1] 102; X32-NEXT: vmovups %xmm0, {{[0-9]+}}(%esp) 103; X32-NEXT: vmovaps {{.*#+}} zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1] 104; X32-NEXT: vmovups %zmm0, (%esp) 105; X32-NEXT: movl $1, {{[0-9]+}}(%esp) 106; X32-NEXT: movl $2, {{[0-9]+}}(%esp) 107; X32-NEXT: movl $2, %eax 108; X32-NEXT: movl $1, %ecx 109; X32-NEXT: movl $2, %edx 110; X32-NEXT: movl $1, %edi 111; X32-NEXT: vzeroupper 112; X32-NEXT: calll _test_argv64i1 113; X32-NEXT: movl %ecx, %edx 114; X32-NEXT: addl $88, %esp 115; X32-NEXT: popl %edi 116; X32-NEXT: retl 117; 118; WIN64-LABEL: caller_argv64i1: 119; WIN64: # %bb.0: # %entry 120; WIN64-NEXT: pushq %r15 121; WIN64-NEXT: .seh_pushreg 15 122; WIN64-NEXT: pushq %r14 123; WIN64-NEXT: .seh_pushreg 14 124; WIN64-NEXT: pushq %r12 125; WIN64-NEXT: .seh_pushreg 12 126; WIN64-NEXT: pushq %rsi 127; WIN64-NEXT: .seh_pushreg 6 128; WIN64-NEXT: pushq %rdi 129; WIN64-NEXT: .seh_pushreg 7 130; WIN64-NEXT: subq $48, %rsp 131; WIN64-NEXT: .seh_stackalloc 48 132; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill 133; WIN64-NEXT: .seh_savexmm 7, 32 134; WIN64-NEXT: vmovaps %xmm6, {{[0-9]+}}(%rsp) # 16-byte Spill 135; WIN64-NEXT: .seh_savexmm 6, 16 136; WIN64-NEXT: .seh_endprologue 137; WIN64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 138; WIN64-NEXT: movq %rax, (%rsp) 139; WIN64-NEXT: movq %rax, %rcx 140; WIN64-NEXT: movq %rax, %rdx 141; WIN64-NEXT: movq %rax, %rdi 142; WIN64-NEXT: movq %rax, %rsi 143; WIN64-NEXT: movq %rax, %r8 144; WIN64-NEXT: movq %rax, %r9 145; WIN64-NEXT: movq %rax, %r10 146; WIN64-NEXT: movq %rax, %r11 147; WIN64-NEXT: movq %rax, %r12 148; WIN64-NEXT: movq %rax, %r14 149; WIN64-NEXT: movq %rax, %r15 150; WIN64-NEXT: callq test_argv64i1 151; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm6 # 16-byte Reload 152; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload 153; WIN64-NEXT: addq $48, %rsp 154; WIN64-NEXT: popq %rdi 155; WIN64-NEXT: popq %rsi 156; WIN64-NEXT: popq %r12 157; WIN64-NEXT: popq %r14 158; WIN64-NEXT: popq %r15 159; WIN64-NEXT: retq 160; WIN64-NEXT: .seh_handlerdata 161; WIN64-NEXT: .text 162; WIN64-NEXT: .seh_endproc 163; 164; LINUXOSX64-LABEL: caller_argv64i1: 165; LINUXOSX64: # %bb.0: # %entry 166; LINUXOSX64-NEXT: pushq %r15 167; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 168; LINUXOSX64-NEXT: pushq %r14 169; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24 170; LINUXOSX64-NEXT: pushq %r13 171; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32 172; LINUXOSX64-NEXT: pushq %r12 173; LINUXOSX64-NEXT: .cfi_def_cfa_offset 40 174; LINUXOSX64-NEXT: pushq %rax 175; LINUXOSX64-NEXT: .cfi_def_cfa_offset 48 176; LINUXOSX64-NEXT: .cfi_offset %r12, -40 177; LINUXOSX64-NEXT: .cfi_offset %r13, -32 178; LINUXOSX64-NEXT: .cfi_offset %r14, -24 179; LINUXOSX64-NEXT: .cfi_offset %r15, -16 180; LINUXOSX64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 181; LINUXOSX64-NEXT: movq %rax, %rcx 182; LINUXOSX64-NEXT: movq %rax, %rdx 183; LINUXOSX64-NEXT: movq %rax, %rdi 184; LINUXOSX64-NEXT: movq %rax, %rsi 185; LINUXOSX64-NEXT: movq %rax, %r8 186; LINUXOSX64-NEXT: movq %rax, %r9 187; LINUXOSX64-NEXT: movq %rax, %r12 188; LINUXOSX64-NEXT: movq %rax, %r13 189; LINUXOSX64-NEXT: movq %rax, %r14 190; LINUXOSX64-NEXT: movq %rax, %r15 191; LINUXOSX64-NEXT: pushq %rax 192; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8 193; LINUXOSX64-NEXT: pushq %rax 194; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8 195; LINUXOSX64-NEXT: callq test_argv64i1 196; LINUXOSX64-NEXT: addq $24, %rsp 197; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset -24 198; LINUXOSX64-NEXT: popq %r12 199; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32 200; LINUXOSX64-NEXT: popq %r13 201; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24 202; LINUXOSX64-NEXT: popq %r14 203; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 204; LINUXOSX64-NEXT: popq %r15 205; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 206; LINUXOSX64-NEXT: retq 207entry: 208 %v0 = bitcast i64 4294967298 to <64 x i1> 209 %call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0, 210 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0, 211 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0, 212 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0, 213 <64 x i1> %v0) 214 ret i64 %call 215} 216 217; Test regcall when returning v64i1 type 218define x86_regcallcc <64 x i1> @test_retv64i1() { 219; X32-LABEL: test_retv64i1: 220; X32: # %bb.0: 221; X32-NEXT: movl $2, %eax 222; X32-NEXT: movl $1, %ecx 223; X32-NEXT: retl 224; 225; CHECK64-LABEL: test_retv64i1: 226; CHECK64: # %bb.0: 227; CHECK64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 228; CHECK64-NEXT: retq 229 %a = bitcast i64 4294967298 to <64 x i1> 230 ret <64 x i1> %a 231} 232 233; Test regcall when processing result of v64i1 type 234define <64 x i1> @caller_retv64i1() #0 { 235; X32-LABEL: caller_retv64i1: 236; X32: # %bb.0: # %entry 237; X32-NEXT: calll _test_retv64i1 238; X32-NEXT: kmovd %eax, %k0 239; X32-NEXT: kmovd %ecx, %k1 240; X32-NEXT: kunpckdq %k0, %k1, %k0 241; X32-NEXT: vpmovm2b %k0, %zmm0 242; X32-NEXT: retl 243; 244; WIN64-LABEL: caller_retv64i1: 245; WIN64: # %bb.0: # %entry 246; WIN64-NEXT: pushq %rsi 247; WIN64-NEXT: .seh_pushreg 6 248; WIN64-NEXT: pushq %rdi 249; WIN64-NEXT: .seh_pushreg 7 250; WIN64-NEXT: subq $40, %rsp 251; WIN64-NEXT: .seh_stackalloc 40 252; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill 253; WIN64-NEXT: .seh_savexmm 7, 16 254; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 255; WIN64-NEXT: .seh_savexmm 6, 0 256; WIN64-NEXT: .seh_endprologue 257; WIN64-NEXT: callq test_retv64i1 258; WIN64-NEXT: kmovq %rax, %k0 259; WIN64-NEXT: vpmovm2b %k0, %zmm0 260; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 261; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload 262; WIN64-NEXT: addq $40, %rsp 263; WIN64-NEXT: popq %rdi 264; WIN64-NEXT: popq %rsi 265; WIN64-NEXT: retq 266; WIN64-NEXT: .seh_handlerdata 267; WIN64-NEXT: .text 268; WIN64-NEXT: .seh_endproc 269; 270; LINUXOSX64-LABEL: caller_retv64i1: 271; LINUXOSX64: # %bb.0: # %entry 272; LINUXOSX64-NEXT: pushq %rax 273; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 274; LINUXOSX64-NEXT: callq test_retv64i1 275; LINUXOSX64-NEXT: kmovq %rax, %k0 276; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm0 277; LINUXOSX64-NEXT: popq %rax 278; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 279; LINUXOSX64-NEXT: retq 280entry: 281 %call = call x86_regcallcc <64 x i1> @test_retv64i1() 282 ret <64 x i1> %call 283} 284 285; Test regcall when receiving arguments of v32i1 type 286declare i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) 287define x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) { 288; X32-LABEL: test_argv32i1: 289; X32: # %bb.0: # %entry 290; X32-NEXT: pushl %esp 291; X32-NEXT: subl $72, %esp 292; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill 293; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill 294; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill 295; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill 296; X32-NEXT: kmovd %edx, %k0 297; X32-NEXT: kmovd %ecx, %k1 298; X32-NEXT: kmovd %eax, %k2 299; X32-NEXT: vpmovm2b %k2, %zmm0 300; X32-NEXT: vpmovm2b %k1, %zmm1 301; X32-NEXT: vpmovm2b %k0, %zmm2 302; X32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 303; X32-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1 304; X32-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2 305; X32-NEXT: calll _test_argv32i1helper 306; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload 307; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload 308; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload 309; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload 310; X32-NEXT: addl $72, %esp 311; X32-NEXT: popl %esp 312; X32-NEXT: vzeroupper 313; X32-NEXT: retl 314; 315; WIN64-LABEL: test_argv32i1: 316; WIN64: # %bb.0: # %entry 317; WIN64-NEXT: pushq %r11 318; WIN64-NEXT: .seh_pushreg 11 319; WIN64-NEXT: pushq %r10 320; WIN64-NEXT: .seh_pushreg 10 321; WIN64-NEXT: pushq %rsp 322; WIN64-NEXT: .seh_pushreg 4 323; WIN64-NEXT: subq $32, %rsp 324; WIN64-NEXT: .seh_stackalloc 32 325; WIN64-NEXT: .seh_endprologue 326; WIN64-NEXT: kmovd %edx, %k0 327; WIN64-NEXT: kmovd %ecx, %k1 328; WIN64-NEXT: kmovd %eax, %k2 329; WIN64-NEXT: vpmovm2b %k2, %zmm0 330; WIN64-NEXT: vpmovm2b %k1, %zmm1 331; WIN64-NEXT: vpmovm2b %k0, %zmm2 332; WIN64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 333; WIN64-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1 334; WIN64-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2 335; WIN64-NEXT: callq test_argv32i1helper 336; WIN64-NEXT: nop 337; WIN64-NEXT: addq $32, %rsp 338; WIN64-NEXT: popq %rsp 339; WIN64-NEXT: popq %r10 340; WIN64-NEXT: popq %r11 341; WIN64-NEXT: vzeroupper 342; WIN64-NEXT: retq 343; WIN64-NEXT: .seh_handlerdata 344; WIN64-NEXT: .text 345; WIN64-NEXT: .seh_endproc 346; 347; LINUXOSX64-LABEL: test_argv32i1: 348; LINUXOSX64: # %bb.0: # %entry 349; LINUXOSX64-NEXT: pushq %rsp 350; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 351; LINUXOSX64-NEXT: subq $128, %rsp 352; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill 353; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill 354; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill 355; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill 356; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill 357; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill 358; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill 359; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill 360; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 361; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 362; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 363; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 364; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 365; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96 366; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80 367; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64 368; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48 369; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32 370; LINUXOSX64-NEXT: kmovd %edx, %k0 371; LINUXOSX64-NEXT: kmovd %ecx, %k1 372; LINUXOSX64-NEXT: kmovd %eax, %k2 373; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0 374; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1 375; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2 376; LINUXOSX64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 377; LINUXOSX64-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1 378; LINUXOSX64-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2 379; LINUXOSX64-NEXT: callq test_argv32i1helper 380; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload 381; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload 382; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload 383; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload 384; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload 385; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload 386; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload 387; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload 388; LINUXOSX64-NEXT: addq $128, %rsp 389; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 390; LINUXOSX64-NEXT: popq %rsp 391; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 392; LINUXOSX64-NEXT: vzeroupper 393; LINUXOSX64-NEXT: retq 394entry: 395 %res = call i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) 396 ret i32 %res 397} 398 399; Test regcall when passing arguments of v32i1 type 400define i32 @caller_argv32i1() #0 { 401; X32-LABEL: caller_argv32i1: 402; X32: # %bb.0: # %entry 403; X32-NEXT: movl $1, %eax 404; X32-NEXT: movl $1, %ecx 405; X32-NEXT: movl $1, %edx 406; X32-NEXT: calll _test_argv32i1 407; X32-NEXT: retl 408; 409; WIN64-LABEL: caller_argv32i1: 410; WIN64: # %bb.0: # %entry 411; WIN64-NEXT: pushq %rsi 412; WIN64-NEXT: .seh_pushreg 6 413; WIN64-NEXT: pushq %rdi 414; WIN64-NEXT: .seh_pushreg 7 415; WIN64-NEXT: subq $40, %rsp 416; WIN64-NEXT: .seh_stackalloc 40 417; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill 418; WIN64-NEXT: .seh_savexmm 7, 16 419; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 420; WIN64-NEXT: .seh_savexmm 6, 0 421; WIN64-NEXT: .seh_endprologue 422; WIN64-NEXT: movl $1, %eax 423; WIN64-NEXT: movl $1, %ecx 424; WIN64-NEXT: movl $1, %edx 425; WIN64-NEXT: callq test_argv32i1 426; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 427; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload 428; WIN64-NEXT: addq $40, %rsp 429; WIN64-NEXT: popq %rdi 430; WIN64-NEXT: popq %rsi 431; WIN64-NEXT: retq 432; WIN64-NEXT: .seh_handlerdata 433; WIN64-NEXT: .text 434; WIN64-NEXT: .seh_endproc 435; 436; LINUXOSX64-LABEL: caller_argv32i1: 437; LINUXOSX64: # %bb.0: # %entry 438; LINUXOSX64-NEXT: pushq %rax 439; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 440; LINUXOSX64-NEXT: movl $1, %eax 441; LINUXOSX64-NEXT: movl $1, %ecx 442; LINUXOSX64-NEXT: movl $1, %edx 443; LINUXOSX64-NEXT: callq test_argv32i1 444; LINUXOSX64-NEXT: popq %rcx 445; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 446; LINUXOSX64-NEXT: retq 447entry: 448 %v0 = bitcast i32 1 to <32 x i1> 449 %call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0) 450 ret i32 %call 451} 452 453; Test regcall when returning v32i1 type 454define x86_regcallcc <32 x i1> @test_retv32i1() { 455; X32-LABEL: test_retv32i1: 456; X32: # %bb.0: 457; X32-NEXT: movl $1, %eax 458; X32-NEXT: retl 459; 460; CHECK64-LABEL: test_retv32i1: 461; CHECK64: # %bb.0: 462; CHECK64-NEXT: movl $1, %eax 463; CHECK64-NEXT: retq 464 %a = bitcast i32 1 to <32 x i1> 465 ret <32 x i1> %a 466} 467 468; Test regcall when processing result of v32i1 type 469define i32 @caller_retv32i1() #0 { 470; X32-LABEL: caller_retv32i1: 471; X32: # %bb.0: # %entry 472; X32-NEXT: calll _test_retv32i1 473; X32-NEXT: incl %eax 474; X32-NEXT: retl 475; 476; WIN64-LABEL: caller_retv32i1: 477; WIN64: # %bb.0: # %entry 478; WIN64-NEXT: pushq %rsi 479; WIN64-NEXT: .seh_pushreg 6 480; WIN64-NEXT: pushq %rdi 481; WIN64-NEXT: .seh_pushreg 7 482; WIN64-NEXT: subq $40, %rsp 483; WIN64-NEXT: .seh_stackalloc 40 484; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill 485; WIN64-NEXT: .seh_savexmm 7, 16 486; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 487; WIN64-NEXT: .seh_savexmm 6, 0 488; WIN64-NEXT: .seh_endprologue 489; WIN64-NEXT: callq test_retv32i1 490; WIN64-NEXT: incl %eax 491; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 492; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload 493; WIN64-NEXT: addq $40, %rsp 494; WIN64-NEXT: popq %rdi 495; WIN64-NEXT: popq %rsi 496; WIN64-NEXT: retq 497; WIN64-NEXT: .seh_handlerdata 498; WIN64-NEXT: .text 499; WIN64-NEXT: .seh_endproc 500; 501; LINUXOSX64-LABEL: caller_retv32i1: 502; LINUXOSX64: # %bb.0: # %entry 503; LINUXOSX64-NEXT: pushq %rax 504; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 505; LINUXOSX64-NEXT: callq test_retv32i1 506; LINUXOSX64-NEXT: incl %eax 507; LINUXOSX64-NEXT: popq %rcx 508; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 509; LINUXOSX64-NEXT: retq 510entry: 511 %call = call x86_regcallcc <32 x i1> @test_retv32i1() 512 %c = bitcast <32 x i1> %call to i32 513 %add = add i32 %c, 1 514 ret i32 %add 515} 516 517; Test regcall when receiving arguments of v16i1 type 518declare i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) 519define x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) { 520; X32-LABEL: test_argv16i1: 521; X32: # %bb.0: 522; X32-NEXT: pushl %esp 523; X32-NEXT: subl $72, %esp 524; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill 525; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill 526; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill 527; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill 528; X32-NEXT: kmovd %edx, %k0 529; X32-NEXT: kmovd %ecx, %k1 530; X32-NEXT: kmovd %eax, %k2 531; X32-NEXT: vpmovm2b %k2, %zmm0 532; X32-NEXT: vpmovm2b %k1, %zmm1 533; X32-NEXT: vpmovm2b %k0, %zmm2 534; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 535; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 536; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 537; X32-NEXT: vzeroupper 538; X32-NEXT: calll _test_argv16i1helper 539; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload 540; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload 541; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload 542; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload 543; X32-NEXT: addl $72, %esp 544; X32-NEXT: popl %esp 545; X32-NEXT: retl 546; 547; WIN64-LABEL: test_argv16i1: 548; WIN64: # %bb.0: 549; WIN64-NEXT: pushq %r11 550; WIN64-NEXT: .seh_pushreg 11 551; WIN64-NEXT: pushq %r10 552; WIN64-NEXT: .seh_pushreg 10 553; WIN64-NEXT: pushq %rsp 554; WIN64-NEXT: .seh_pushreg 4 555; WIN64-NEXT: subq $32, %rsp 556; WIN64-NEXT: .seh_stackalloc 32 557; WIN64-NEXT: .seh_endprologue 558; WIN64-NEXT: kmovd %edx, %k0 559; WIN64-NEXT: kmovd %ecx, %k1 560; WIN64-NEXT: kmovd %eax, %k2 561; WIN64-NEXT: vpmovm2b %k2, %zmm0 562; WIN64-NEXT: vpmovm2b %k1, %zmm1 563; WIN64-NEXT: vpmovm2b %k0, %zmm2 564; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 565; WIN64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 566; WIN64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 567; WIN64-NEXT: vzeroupper 568; WIN64-NEXT: callq test_argv16i1helper 569; WIN64-NEXT: nop 570; WIN64-NEXT: addq $32, %rsp 571; WIN64-NEXT: popq %rsp 572; WIN64-NEXT: popq %r10 573; WIN64-NEXT: popq %r11 574; WIN64-NEXT: retq 575; WIN64-NEXT: .seh_handlerdata 576; WIN64-NEXT: .text 577; WIN64-NEXT: .seh_endproc 578; 579; LINUXOSX64-LABEL: test_argv16i1: 580; LINUXOSX64: # %bb.0: 581; LINUXOSX64-NEXT: pushq %rsp 582; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 583; LINUXOSX64-NEXT: subq $128, %rsp 584; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill 585; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill 586; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill 587; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill 588; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill 589; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill 590; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill 591; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill 592; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 593; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 594; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 595; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 596; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 597; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96 598; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80 599; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64 600; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48 601; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32 602; LINUXOSX64-NEXT: kmovd %edx, %k0 603; LINUXOSX64-NEXT: kmovd %ecx, %k1 604; LINUXOSX64-NEXT: kmovd %eax, %k2 605; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0 606; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1 607; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2 608; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 609; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 610; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 611; LINUXOSX64-NEXT: vzeroupper 612; LINUXOSX64-NEXT: callq test_argv16i1helper 613; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload 614; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload 615; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload 616; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload 617; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload 618; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload 619; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload 620; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload 621; LINUXOSX64-NEXT: addq $128, %rsp 622; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 623; LINUXOSX64-NEXT: popq %rsp 624; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 625; LINUXOSX64-NEXT: retq 626 %res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) 627 ret i16 %res 628} 629 630; Test regcall when passing arguments of v16i1 type 631define i16 @caller_argv16i1() #0 { 632; X32-LABEL: caller_argv16i1: 633; X32: # %bb.0: # %entry 634; X32-NEXT: movl $1, %eax 635; X32-NEXT: movl $1, %ecx 636; X32-NEXT: movl $1, %edx 637; X32-NEXT: calll _test_argv16i1 638; X32-NEXT: retl 639; 640; WIN64-LABEL: caller_argv16i1: 641; WIN64: # %bb.0: # %entry 642; WIN64-NEXT: pushq %rsi 643; WIN64-NEXT: .seh_pushreg 6 644; WIN64-NEXT: pushq %rdi 645; WIN64-NEXT: .seh_pushreg 7 646; WIN64-NEXT: subq $40, %rsp 647; WIN64-NEXT: .seh_stackalloc 40 648; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill 649; WIN64-NEXT: .seh_savexmm 7, 16 650; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 651; WIN64-NEXT: .seh_savexmm 6, 0 652; WIN64-NEXT: .seh_endprologue 653; WIN64-NEXT: movl $1, %eax 654; WIN64-NEXT: movl $1, %ecx 655; WIN64-NEXT: movl $1, %edx 656; WIN64-NEXT: callq test_argv16i1 657; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 658; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload 659; WIN64-NEXT: addq $40, %rsp 660; WIN64-NEXT: popq %rdi 661; WIN64-NEXT: popq %rsi 662; WIN64-NEXT: retq 663; WIN64-NEXT: .seh_handlerdata 664; WIN64-NEXT: .text 665; WIN64-NEXT: .seh_endproc 666; 667; LINUXOSX64-LABEL: caller_argv16i1: 668; LINUXOSX64: # %bb.0: # %entry 669; LINUXOSX64-NEXT: pushq %rax 670; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 671; LINUXOSX64-NEXT: movl $1, %eax 672; LINUXOSX64-NEXT: movl $1, %ecx 673; LINUXOSX64-NEXT: movl $1, %edx 674; LINUXOSX64-NEXT: callq test_argv16i1 675; LINUXOSX64-NEXT: popq %rcx 676; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 677; LINUXOSX64-NEXT: retq 678entry: 679 %v0 = bitcast i16 1 to <16 x i1> 680 %call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0) 681 ret i16 %call 682} 683 684; Test regcall when returning v16i1 type 685define x86_regcallcc <16 x i1> @test_retv16i1() { 686; X32-LABEL: test_retv16i1: 687; X32: # %bb.0: 688; X32-NEXT: movw $1, %ax 689; X32-NEXT: retl 690; 691; CHECK64-LABEL: test_retv16i1: 692; CHECK64: # %bb.0: 693; CHECK64-NEXT: movw $1, %ax 694; CHECK64-NEXT: retq 695 %a = bitcast i16 1 to <16 x i1> 696 ret <16 x i1> %a 697} 698 699; Test regcall when processing result of v16i1 type 700define i16 @caller_retv16i1() #0 { 701; X32-LABEL: caller_retv16i1: 702; X32: # %bb.0: # %entry 703; X32-NEXT: calll _test_retv16i1 704; X32-NEXT: # kill: def $ax killed $ax def $eax 705; X32-NEXT: incl %eax 706; X32-NEXT: # kill: def $ax killed $ax killed $eax 707; X32-NEXT: retl 708; 709; WIN64-LABEL: caller_retv16i1: 710; WIN64: # %bb.0: # %entry 711; WIN64-NEXT: pushq %rsi 712; WIN64-NEXT: .seh_pushreg 6 713; WIN64-NEXT: pushq %rdi 714; WIN64-NEXT: .seh_pushreg 7 715; WIN64-NEXT: subq $40, %rsp 716; WIN64-NEXT: .seh_stackalloc 40 717; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill 718; WIN64-NEXT: .seh_savexmm 7, 16 719; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 720; WIN64-NEXT: .seh_savexmm 6, 0 721; WIN64-NEXT: .seh_endprologue 722; WIN64-NEXT: callq test_retv16i1 723; WIN64-NEXT: # kill: def $ax killed $ax def $eax 724; WIN64-NEXT: incl %eax 725; WIN64-NEXT: # kill: def $ax killed $ax killed $eax 726; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 727; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload 728; WIN64-NEXT: addq $40, %rsp 729; WIN64-NEXT: popq %rdi 730; WIN64-NEXT: popq %rsi 731; WIN64-NEXT: retq 732; WIN64-NEXT: .seh_handlerdata 733; WIN64-NEXT: .text 734; WIN64-NEXT: .seh_endproc 735; 736; LINUXOSX64-LABEL: caller_retv16i1: 737; LINUXOSX64: # %bb.0: # %entry 738; LINUXOSX64-NEXT: pushq %rax 739; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 740; LINUXOSX64-NEXT: callq test_retv16i1 741; LINUXOSX64-NEXT: # kill: def $ax killed $ax def $eax 742; LINUXOSX64-NEXT: incl %eax 743; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax 744; LINUXOSX64-NEXT: popq %rcx 745; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 746; LINUXOSX64-NEXT: retq 747entry: 748 %call = call x86_regcallcc <16 x i1> @test_retv16i1() 749 %c = bitcast <16 x i1> %call to i16 750 %add = add i16 %c, 1 751 ret i16 %add 752} 753 754; Test regcall when receiving arguments of v8i1 type 755declare i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) 756define x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) { 757; X32-LABEL: test_argv8i1: 758; X32: # %bb.0: 759; X32-NEXT: pushl %esp 760; X32-NEXT: subl $72, %esp 761; X32-NEXT: vmovups %xmm7, {{[0-9]+}}(%esp) # 16-byte Spill 762; X32-NEXT: vmovups %xmm6, {{[0-9]+}}(%esp) # 16-byte Spill 763; X32-NEXT: vmovups %xmm5, {{[0-9]+}}(%esp) # 16-byte Spill 764; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill 765; X32-NEXT: kmovd %edx, %k0 766; X32-NEXT: kmovd %ecx, %k1 767; X32-NEXT: kmovd %eax, %k2 768; X32-NEXT: vpmovm2w %k2, %zmm0 769; X32-NEXT: vpmovm2w %k1, %zmm1 770; X32-NEXT: vpmovm2w %k0, %zmm2 771; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 772; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 773; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 774; X32-NEXT: vzeroupper 775; X32-NEXT: calll _test_argv8i1helper 776; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload 777; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm5 # 16-byte Reload 778; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm6 # 16-byte Reload 779; X32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm7 # 16-byte Reload 780; X32-NEXT: addl $72, %esp 781; X32-NEXT: popl %esp 782; X32-NEXT: retl 783; 784; WIN64-LABEL: test_argv8i1: 785; WIN64: # %bb.0: 786; WIN64-NEXT: pushq %r11 787; WIN64-NEXT: .seh_pushreg 11 788; WIN64-NEXT: pushq %r10 789; WIN64-NEXT: .seh_pushreg 10 790; WIN64-NEXT: pushq %rsp 791; WIN64-NEXT: .seh_pushreg 4 792; WIN64-NEXT: subq $32, %rsp 793; WIN64-NEXT: .seh_stackalloc 32 794; WIN64-NEXT: .seh_endprologue 795; WIN64-NEXT: kmovd %edx, %k0 796; WIN64-NEXT: kmovd %ecx, %k1 797; WIN64-NEXT: kmovd %eax, %k2 798; WIN64-NEXT: vpmovm2w %k2, %zmm0 799; WIN64-NEXT: vpmovm2w %k1, %zmm1 800; WIN64-NEXT: vpmovm2w %k0, %zmm2 801; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 802; WIN64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 803; WIN64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 804; WIN64-NEXT: vzeroupper 805; WIN64-NEXT: callq test_argv8i1helper 806; WIN64-NEXT: nop 807; WIN64-NEXT: addq $32, %rsp 808; WIN64-NEXT: popq %rsp 809; WIN64-NEXT: popq %r10 810; WIN64-NEXT: popq %r11 811; WIN64-NEXT: retq 812; WIN64-NEXT: .seh_handlerdata 813; WIN64-NEXT: .text 814; WIN64-NEXT: .seh_endproc 815; 816; LINUXOSX64-LABEL: test_argv8i1: 817; LINUXOSX64: # %bb.0: 818; LINUXOSX64-NEXT: pushq %rsp 819; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 820; LINUXOSX64-NEXT: subq $128, %rsp 821; LINUXOSX64-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill 822; LINUXOSX64-NEXT: vmovaps %xmm14, {{[0-9]+}}(%rsp) # 16-byte Spill 823; LINUXOSX64-NEXT: vmovaps %xmm13, {{[0-9]+}}(%rsp) # 16-byte Spill 824; LINUXOSX64-NEXT: vmovaps %xmm12, {{[0-9]+}}(%rsp) # 16-byte Spill 825; LINUXOSX64-NEXT: vmovaps %xmm11, {{[0-9]+}}(%rsp) # 16-byte Spill 826; LINUXOSX64-NEXT: vmovaps %xmm10, {{[0-9]+}}(%rsp) # 16-byte Spill 827; LINUXOSX64-NEXT: vmovaps %xmm9, {{[0-9]+}}(%rsp) # 16-byte Spill 828; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill 829; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 830; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 831; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 832; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 833; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 834; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96 835; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80 836; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64 837; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48 838; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32 839; LINUXOSX64-NEXT: kmovd %edx, %k0 840; LINUXOSX64-NEXT: kmovd %ecx, %k1 841; LINUXOSX64-NEXT: kmovd %eax, %k2 842; LINUXOSX64-NEXT: vpmovm2w %k2, %zmm0 843; LINUXOSX64-NEXT: vpmovm2w %k1, %zmm1 844; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm2 845; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 846; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 847; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 848; LINUXOSX64-NEXT: vzeroupper 849; LINUXOSX64-NEXT: callq test_argv8i1helper 850; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload 851; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm9 # 16-byte Reload 852; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm10 # 16-byte Reload 853; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm11 # 16-byte Reload 854; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm12 # 16-byte Reload 855; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm13 # 16-byte Reload 856; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm14 # 16-byte Reload 857; LINUXOSX64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm15 # 16-byte Reload 858; LINUXOSX64-NEXT: addq $128, %rsp 859; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 860; LINUXOSX64-NEXT: popq %rsp 861; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 862; LINUXOSX64-NEXT: retq 863 %res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) 864 ret i8 %res 865} 866 867; Test regcall when passing arguments of v8i1 type 868define i8 @caller_argv8i1() #0 { 869; X32-LABEL: caller_argv8i1: 870; X32: # %bb.0: # %entry 871; X32-NEXT: movl $1, %eax 872; X32-NEXT: movl $1, %ecx 873; X32-NEXT: movl $1, %edx 874; X32-NEXT: calll _test_argv8i1 875; X32-NEXT: retl 876; 877; WIN64-LABEL: caller_argv8i1: 878; WIN64: # %bb.0: # %entry 879; WIN64-NEXT: pushq %rsi 880; WIN64-NEXT: .seh_pushreg 6 881; WIN64-NEXT: pushq %rdi 882; WIN64-NEXT: .seh_pushreg 7 883; WIN64-NEXT: subq $40, %rsp 884; WIN64-NEXT: .seh_stackalloc 40 885; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill 886; WIN64-NEXT: .seh_savexmm 7, 16 887; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 888; WIN64-NEXT: .seh_savexmm 6, 0 889; WIN64-NEXT: .seh_endprologue 890; WIN64-NEXT: movl $1, %eax 891; WIN64-NEXT: movl $1, %ecx 892; WIN64-NEXT: movl $1, %edx 893; WIN64-NEXT: callq test_argv8i1 894; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 895; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload 896; WIN64-NEXT: addq $40, %rsp 897; WIN64-NEXT: popq %rdi 898; WIN64-NEXT: popq %rsi 899; WIN64-NEXT: retq 900; WIN64-NEXT: .seh_handlerdata 901; WIN64-NEXT: .text 902; WIN64-NEXT: .seh_endproc 903; 904; LINUXOSX64-LABEL: caller_argv8i1: 905; LINUXOSX64: # %bb.0: # %entry 906; LINUXOSX64-NEXT: pushq %rax 907; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 908; LINUXOSX64-NEXT: movl $1, %eax 909; LINUXOSX64-NEXT: movl $1, %ecx 910; LINUXOSX64-NEXT: movl $1, %edx 911; LINUXOSX64-NEXT: callq test_argv8i1 912; LINUXOSX64-NEXT: popq %rcx 913; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 914; LINUXOSX64-NEXT: retq 915entry: 916 %v0 = bitcast i8 1 to <8 x i1> 917 %call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0) 918 ret i8 %call 919} 920 921; Test regcall when returning v8i1 type 922define x86_regcallcc <8 x i1> @test_retv8i1() { 923; X32-LABEL: test_retv8i1: 924; X32: # %bb.0: 925; X32-NEXT: movb $1, %al 926; X32-NEXT: retl 927; 928; CHECK64-LABEL: test_retv8i1: 929; CHECK64: # %bb.0: 930; CHECK64-NEXT: movb $1, %al 931; CHECK64-NEXT: retq 932 %a = bitcast i8 1 to <8 x i1> 933 ret <8 x i1> %a 934} 935 936; Test regcall when processing result of v8i1 type 937define <8 x i1> @caller_retv8i1() #0 { 938; X32-LABEL: caller_retv8i1: 939; X32: # %bb.0: # %entry 940; X32-NEXT: calll _test_retv8i1 941; X32-NEXT: # kill: def $al killed $al def $eax 942; X32-NEXT: kmovd %eax, %k0 943; X32-NEXT: vpmovm2w %k0, %zmm0 944; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 945; X32-NEXT: vzeroupper 946; X32-NEXT: retl 947; 948; WIN64-LABEL: caller_retv8i1: 949; WIN64: # %bb.0: # %entry 950; WIN64-NEXT: pushq %rsi 951; WIN64-NEXT: .seh_pushreg 6 952; WIN64-NEXT: pushq %rdi 953; WIN64-NEXT: .seh_pushreg 7 954; WIN64-NEXT: subq $40, %rsp 955; WIN64-NEXT: .seh_stackalloc 40 956; WIN64-NEXT: vmovaps %xmm7, {{[0-9]+}}(%rsp) # 16-byte Spill 957; WIN64-NEXT: .seh_savexmm 7, 16 958; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 959; WIN64-NEXT: .seh_savexmm 6, 0 960; WIN64-NEXT: .seh_endprologue 961; WIN64-NEXT: callq test_retv8i1 962; WIN64-NEXT: # kill: def $al killed $al def $eax 963; WIN64-NEXT: kmovd %eax, %k0 964; WIN64-NEXT: vpmovm2w %k0, %zmm0 965; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 966; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 967; WIN64-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm7 # 16-byte Reload 968; WIN64-NEXT: addq $40, %rsp 969; WIN64-NEXT: popq %rdi 970; WIN64-NEXT: popq %rsi 971; WIN64-NEXT: vzeroupper 972; WIN64-NEXT: retq 973; WIN64-NEXT: .seh_handlerdata 974; WIN64-NEXT: .text 975; WIN64-NEXT: .seh_endproc 976; 977; LINUXOSX64-LABEL: caller_retv8i1: 978; LINUXOSX64: # %bb.0: # %entry 979; LINUXOSX64-NEXT: pushq %rax 980; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 981; LINUXOSX64-NEXT: callq test_retv8i1 982; LINUXOSX64-NEXT: # kill: def $al killed $al def $eax 983; LINUXOSX64-NEXT: kmovd %eax, %k0 984; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm0 985; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 986; LINUXOSX64-NEXT: popq %rax 987; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 988; LINUXOSX64-NEXT: vzeroupper 989; LINUXOSX64-NEXT: retq 990entry: 991 %call = call x86_regcallcc <8 x i1> @test_retv8i1() 992 ret <8 x i1> %call 993} 994 995