1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 4 5define <2 x i256> @test_shl(<2 x i256> %In) { 6; X32-LABEL: test_shl: 7; X32: # %bb.0: 8; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 10; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 11; X32-NEXT: shldl $2, %edx, %ecx 12; X32-NEXT: movl %ecx, 60(%eax) 13; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 14; X32-NEXT: shldl $2, %ecx, %edx 15; X32-NEXT: movl %edx, 56(%eax) 16; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 17; X32-NEXT: shldl $2, %edx, %ecx 18; X32-NEXT: movl %ecx, 52(%eax) 19; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 20; X32-NEXT: shldl $2, %ecx, %edx 21; X32-NEXT: movl %edx, 48(%eax) 22; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 23; X32-NEXT: shldl $2, %edx, %ecx 24; X32-NEXT: movl %ecx, 44(%eax) 25; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 26; X32-NEXT: shldl $2, %ecx, %edx 27; X32-NEXT: movl %edx, 40(%eax) 28; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 29; X32-NEXT: shldl $2, %edx, %ecx 30; X32-NEXT: movl %ecx, 36(%eax) 31; X32-NEXT: shll $2, %edx 32; X32-NEXT: movl %edx, 32(%eax) 33; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 34; X32-NEXT: shll $31, %ecx 35; X32-NEXT: movl %ecx, 28(%eax) 36; X32-NEXT: movl $0, 24(%eax) 37; X32-NEXT: movl $0, 20(%eax) 38; X32-NEXT: movl $0, 16(%eax) 39; X32-NEXT: movl $0, 12(%eax) 40; X32-NEXT: movl $0, 8(%eax) 41; X32-NEXT: movl $0, 4(%eax) 42; X32-NEXT: movl $0, (%eax) 43; X32-NEXT: retl $4 44; 45; X64-LABEL: test_shl: 46; X64: # %bb.0: 47; X64-NEXT: movq %rdi, %rax 48; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx 49; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx 50; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi 51; X64-NEXT: shldq $2, %rcx, %rdx 52; X64-NEXT: shldq $2, %rdi, %rcx 53; X64-NEXT: shldq $2, %r9, %rdi 54; X64-NEXT: shlq $63, %rsi 55; X64-NEXT: shlq $2, %r9 56; X64-NEXT: movq %rdx, 56(%rax) 57; X64-NEXT: movq %rcx, 48(%rax) 58; X64-NEXT: movq %rdi, 40(%rax) 59; X64-NEXT: movq %r9, 32(%rax) 60; X64-NEXT: movq %rsi, 24(%rax) 61; X64-NEXT: xorps %xmm0, %xmm0 62; X64-NEXT: movaps %xmm0, (%rax) 63; X64-NEXT: movq $0, 16(%rax) 64; X64-NEXT: retq 65 %Amt = insertelement <2 x i256> <i256 1, i256 2>, i256 255, i32 0 66 %Out = shl <2 x i256> %In, %Amt 67 ret <2 x i256> %Out 68} 69 70define <2 x i256> @test_srl(<2 x i256> %In) { 71; X32-LABEL: test_srl: 72; X32: # %bb.0: 73; X32-NEXT: pushl %ebp 74; X32-NEXT: .cfi_def_cfa_offset 8 75; X32-NEXT: pushl %ebx 76; X32-NEXT: .cfi_def_cfa_offset 12 77; X32-NEXT: pushl %edi 78; X32-NEXT: .cfi_def_cfa_offset 16 79; X32-NEXT: pushl %esi 80; X32-NEXT: .cfi_def_cfa_offset 20 81; X32-NEXT: subl $8, %esp 82; X32-NEXT: .cfi_def_cfa_offset 28 83; X32-NEXT: .cfi_offset %esi, -20 84; X32-NEXT: .cfi_offset %edi, -16 85; X32-NEXT: .cfi_offset %ebx, -12 86; X32-NEXT: .cfi_offset %ebp, -8 87; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 88; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 89; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 90; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 91; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx 92; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp 93; X32-NEXT: movl %edx, %ecx 94; X32-NEXT: shldl $28, %eax, %ecx 95; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 96; X32-NEXT: shldl $28, %esi, %eax 97; X32-NEXT: movl %eax, (%esp) # 4-byte Spill 98; X32-NEXT: shldl $28, %edi, %esi 99; X32-NEXT: shldl $28, %ebx, %edi 100; X32-NEXT: shldl $28, %ebp, %ebx 101; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 102; X32-NEXT: shldl $28, %eax, %ebp 103; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 104; X32-NEXT: shrdl $4, %eax, %ecx 105; X32-NEXT: shrl $4, %edx 106; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 107; X32-NEXT: movl %edx, 60(%eax) 108; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 109; X32-NEXT: movl %edx, 56(%eax) 110; X32-NEXT: movl (%esp), %edx # 4-byte Reload 111; X32-NEXT: movl %edx, 52(%eax) 112; X32-NEXT: movl %esi, 48(%eax) 113; X32-NEXT: movl %edi, 44(%eax) 114; X32-NEXT: movl %ebx, 40(%eax) 115; X32-NEXT: movl %ebp, 36(%eax) 116; X32-NEXT: movl %ecx, 32(%eax) 117; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 118; X32-NEXT: shrl $31, %ecx 119; X32-NEXT: movl %ecx, (%eax) 120; X32-NEXT: movl $0, 28(%eax) 121; X32-NEXT: movl $0, 24(%eax) 122; X32-NEXT: movl $0, 20(%eax) 123; X32-NEXT: movl $0, 16(%eax) 124; X32-NEXT: movl $0, 12(%eax) 125; X32-NEXT: movl $0, 8(%eax) 126; X32-NEXT: movl $0, 4(%eax) 127; X32-NEXT: addl $8, %esp 128; X32-NEXT: .cfi_def_cfa_offset 20 129; X32-NEXT: popl %esi 130; X32-NEXT: .cfi_def_cfa_offset 16 131; X32-NEXT: popl %edi 132; X32-NEXT: .cfi_def_cfa_offset 12 133; X32-NEXT: popl %ebx 134; X32-NEXT: .cfi_def_cfa_offset 8 135; X32-NEXT: popl %ebp 136; X32-NEXT: .cfi_def_cfa_offset 4 137; X32-NEXT: retl $4 138; 139; X64-LABEL: test_srl: 140; X64: # %bb.0: 141; X64-NEXT: movq %rdi, %rax 142; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx 143; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx 144; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi 145; X64-NEXT: shrdq $4, %rsi, %r9 146; X64-NEXT: shrdq $4, %rcx, %rsi 147; X64-NEXT: shrq $63, %r8 148; X64-NEXT: shrdq $4, %rdx, %rcx 149; X64-NEXT: shrq $4, %rdx 150; X64-NEXT: movq %rdx, 56(%rdi) 151; X64-NEXT: movq %rcx, 48(%rdi) 152; X64-NEXT: movq %rsi, 40(%rdi) 153; X64-NEXT: movq %r9, 32(%rdi) 154; X64-NEXT: movq %r8, (%rdi) 155; X64-NEXT: xorps %xmm0, %xmm0 156; X64-NEXT: movaps %xmm0, 16(%rdi) 157; X64-NEXT: movq $0, 8(%rdi) 158; X64-NEXT: retq 159 %Amt = insertelement <2 x i256> <i256 3, i256 4>, i256 255, i32 0 160 %Out = lshr <2 x i256> %In, %Amt 161 ret <2 x i256> %Out 162} 163 164define <2 x i256> @test_sra(<2 x i256> %In) { 165; X32-LABEL: test_sra: 166; X32: # %bb.0: 167; X32-NEXT: pushl %ebp 168; X32-NEXT: .cfi_def_cfa_offset 8 169; X32-NEXT: pushl %ebx 170; X32-NEXT: .cfi_def_cfa_offset 12 171; X32-NEXT: pushl %edi 172; X32-NEXT: .cfi_def_cfa_offset 16 173; X32-NEXT: pushl %esi 174; X32-NEXT: .cfi_def_cfa_offset 20 175; X32-NEXT: subl $8, %esp 176; X32-NEXT: .cfi_def_cfa_offset 28 177; X32-NEXT: .cfi_offset %esi, -20 178; X32-NEXT: .cfi_offset %edi, -16 179; X32-NEXT: .cfi_offset %ebx, -12 180; X32-NEXT: .cfi_offset %ebp, -8 181; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 182; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 183; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 184; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 185; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx 186; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp 187; X32-NEXT: movl %edx, %ecx 188; X32-NEXT: shldl $26, %eax, %ecx 189; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 190; X32-NEXT: shldl $26, %esi, %eax 191; X32-NEXT: movl %eax, (%esp) # 4-byte Spill 192; X32-NEXT: shldl $26, %edi, %esi 193; X32-NEXT: shldl $26, %ebx, %edi 194; X32-NEXT: shldl $26, %ebp, %ebx 195; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 196; X32-NEXT: shldl $26, %eax, %ebp 197; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 198; X32-NEXT: shrdl $6, %eax, %ecx 199; X32-NEXT: sarl $6, %edx 200; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 201; X32-NEXT: movl %edx, 60(%eax) 202; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 203; X32-NEXT: movl %edx, 56(%eax) 204; X32-NEXT: movl (%esp), %edx # 4-byte Reload 205; X32-NEXT: movl %edx, 52(%eax) 206; X32-NEXT: movl %esi, 48(%eax) 207; X32-NEXT: movl %edi, 44(%eax) 208; X32-NEXT: movl %ebx, 40(%eax) 209; X32-NEXT: movl %ebp, 36(%eax) 210; X32-NEXT: movl %ecx, 32(%eax) 211; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 212; X32-NEXT: sarl $31, %ecx 213; X32-NEXT: movl %ecx, 28(%eax) 214; X32-NEXT: movl %ecx, 24(%eax) 215; X32-NEXT: movl %ecx, 20(%eax) 216; X32-NEXT: movl %ecx, 16(%eax) 217; X32-NEXT: movl %ecx, 12(%eax) 218; X32-NEXT: movl %ecx, 8(%eax) 219; X32-NEXT: movl %ecx, 4(%eax) 220; X32-NEXT: movl %ecx, (%eax) 221; X32-NEXT: addl $8, %esp 222; X32-NEXT: .cfi_def_cfa_offset 20 223; X32-NEXT: popl %esi 224; X32-NEXT: .cfi_def_cfa_offset 16 225; X32-NEXT: popl %edi 226; X32-NEXT: .cfi_def_cfa_offset 12 227; X32-NEXT: popl %ebx 228; X32-NEXT: .cfi_def_cfa_offset 8 229; X32-NEXT: popl %ebp 230; X32-NEXT: .cfi_def_cfa_offset 4 231; X32-NEXT: retl $4 232; 233; X64-LABEL: test_sra: 234; X64: # %bb.0: 235; X64-NEXT: movq %rdi, %rax 236; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx 237; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx 238; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi 239; X64-NEXT: shrdq $6, %rsi, %r9 240; X64-NEXT: shrdq $6, %rcx, %rsi 241; X64-NEXT: sarq $63, %r8 242; X64-NEXT: shrdq $6, %rdx, %rcx 243; X64-NEXT: sarq $6, %rdx 244; X64-NEXT: movq %rdx, 56(%rdi) 245; X64-NEXT: movq %rcx, 48(%rdi) 246; X64-NEXT: movq %rsi, 40(%rdi) 247; X64-NEXT: movq %r9, 32(%rdi) 248; X64-NEXT: movq %r8, 24(%rdi) 249; X64-NEXT: movq %r8, 16(%rdi) 250; X64-NEXT: movq %r8, 8(%rdi) 251; X64-NEXT: movq %r8, (%rdi) 252; X64-NEXT: retq 253 %Amt = insertelement <2 x i256> <i256 5, i256 6>, i256 255, i32 0 254 %Out = ashr <2 x i256> %In, %Amt 255 ret <2 x i256> %Out 256} 257