1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sha | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GOLDMONT 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=cannonlake | FileCheck %s --check-prefix=CHECK --check-prefix=CANNONLAKE 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 6 7; 8; SHA1 9; 10 11define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 12; GENERIC-LABEL: test_sha1msg1: 13; GENERIC: # %bb.0: 14; GENERIC-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [5:1.00] 15; GENERIC-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [11:1.00] 16; GENERIC-NEXT: retq # sched: [1:1.00] 17; 18; GOLDMONT-LABEL: test_sha1msg1: 19; GOLDMONT: # %bb.0: 20; GOLDMONT-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:1.00] 21; GOLDMONT-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [7:1.00] 22; GOLDMONT-NEXT: retq # sched: [4:1.00] 23; 24; CANNONLAKE-LABEL: test_sha1msg1: 25; CANNONLAKE: # %bb.0: 26; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.50] 27; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:0.50] 28; CANNONLAKE-NEXT: retq # sched: [7:1.00] 29; 30; ZNVER1-LABEL: test_sha1msg1: 31; ZNVER1: # %bb.0: 32; ZNVER1-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [2:1.00] 33; ZNVER1-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [9:1.00] 34; ZNVER1-NEXT: retq # sched: [1:0.50] 35 %1 = load <4 x i32>, <4 x i32>* %a2 36 %2 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a0, <4 x i32> %a1) 37 %3 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %2, <4 x i32> %1) 38 ret <4 x i32> %3 39} 40declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) 41 42define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 43; GENERIC-LABEL: test_sha1msg2: 44; GENERIC: # %bb.0: 45; GENERIC-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [5:1.00] 46; GENERIC-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [11:1.00] 47; GENERIC-NEXT: retq # sched: [1:1.00] 48; 49; GOLDMONT-LABEL: test_sha1msg2: 50; GOLDMONT: # %bb.0: 51; GOLDMONT-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:1.00] 52; GOLDMONT-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [7:1.00] 53; GOLDMONT-NEXT: retq # sched: [4:1.00] 54; 55; CANNONLAKE-LABEL: test_sha1msg2: 56; CANNONLAKE: # %bb.0: 57; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.50] 58; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:0.50] 59; CANNONLAKE-NEXT: retq # sched: [7:1.00] 60; 61; ZNVER1-LABEL: test_sha1msg2: 62; ZNVER1: # %bb.0: 63; ZNVER1-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [1:0.50] 64; ZNVER1-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [8:0.50] 65; ZNVER1-NEXT: retq # sched: [1:0.50] 66 %1 = load <4 x i32>, <4 x i32>* %a2 67 %2 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a0, <4 x i32> %a1) 68 %3 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %2, <4 x i32> %1) 69 ret <4 x i32> %3 70} 71declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) 72 73define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 74; GENERIC-LABEL: test_sha1nexte: 75; GENERIC: # %bb.0: 76; GENERIC-NEXT: sha1nexte %xmm1, %xmm0 # sched: [5:1.00] 77; GENERIC-NEXT: sha1nexte (%rdi), %xmm0 # sched: [11:1.00] 78; GENERIC-NEXT: retq # sched: [1:1.00] 79; 80; GOLDMONT-LABEL: test_sha1nexte: 81; GOLDMONT: # %bb.0: 82; GOLDMONT-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:1.00] 83; GOLDMONT-NEXT: sha1nexte (%rdi), %xmm0 # sched: [7:1.00] 84; GOLDMONT-NEXT: retq # sched: [4:1.00] 85; 86; CANNONLAKE-LABEL: test_sha1nexte: 87; CANNONLAKE: # %bb.0: 88; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.50] 89; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:0.50] 90; CANNONLAKE-NEXT: retq # sched: [7:1.00] 91; 92; ZNVER1-LABEL: test_sha1nexte: 93; ZNVER1: # %bb.0: 94; ZNVER1-NEXT: sha1nexte %xmm1, %xmm0 # sched: [1:1.00] 95; ZNVER1-NEXT: sha1nexte (%rdi), %xmm0 # sched: [8:1.00] 96; ZNVER1-NEXT: retq # sched: [1:0.50] 97 %1 = load <4 x i32>, <4 x i32>* %a2 98 %2 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a0, <4 x i32> %a1) 99 %3 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %2, <4 x i32> %1) 100 ret <4 x i32> %3 101} 102declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) 103 104define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 105; GENERIC-LABEL: test_sha1rnds4: 106; GENERIC: # %bb.0: 107; GENERIC-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00] 108; GENERIC-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [11:1.00] 109; GENERIC-NEXT: retq # sched: [1:1.00] 110; 111; GOLDMONT-LABEL: test_sha1rnds4: 112; GOLDMONT: # %bb.0: 113; GOLDMONT-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:1.00] 114; GOLDMONT-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [7:1.00] 115; GOLDMONT-NEXT: retq # sched: [4:1.00] 116; 117; CANNONLAKE-LABEL: test_sha1rnds4: 118; CANNONLAKE: # %bb.0: 119; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.50] 120; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:0.50] 121; CANNONLAKE-NEXT: retq # sched: [7:1.00] 122; 123; ZNVER1-LABEL: test_sha1rnds4: 124; ZNVER1: # %bb.0: 125; ZNVER1-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [6:1.00] 126; ZNVER1-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [13:1.00] 127; ZNVER1-NEXT: retq # sched: [1:0.50] 128 %1 = load <4 x i32>, <4 x i32>* %a2 129 %2 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, i8 3) 130 %3 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %2, <4 x i32> %1, i8 3) 131 ret <4 x i32> %3 132} 133declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) 134 135; 136; SHA256 137; 138 139define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 140; GENERIC-LABEL: test_sha256msg1: 141; GENERIC: # %bb.0: 142; GENERIC-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [5:1.00] 143; GENERIC-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [11:1.00] 144; GENERIC-NEXT: retq # sched: [1:1.00] 145; 146; GOLDMONT-LABEL: test_sha256msg1: 147; GOLDMONT: # %bb.0: 148; GOLDMONT-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:1.00] 149; GOLDMONT-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [7:1.00] 150; GOLDMONT-NEXT: retq # sched: [4:1.00] 151; 152; CANNONLAKE-LABEL: test_sha256msg1: 153; CANNONLAKE: # %bb.0: 154; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.50] 155; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:0.50] 156; CANNONLAKE-NEXT: retq # sched: [7:1.00] 157; 158; ZNVER1-LABEL: test_sha256msg1: 159; ZNVER1: # %bb.0: 160; ZNVER1-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [2:1.00] 161; ZNVER1-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [9:1.00] 162; ZNVER1-NEXT: retq # sched: [1:0.50] 163 %1 = load <4 x i32>, <4 x i32>* %a2 164 %2 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a0, <4 x i32> %a1) 165 %3 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %2, <4 x i32> %1) 166 ret <4 x i32> %3 167} 168declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) 169 170define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 171; GENERIC-LABEL: test_sha256msg2: 172; GENERIC: # %bb.0: 173; GENERIC-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [5:1.00] 174; GENERIC-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [11:1.00] 175; GENERIC-NEXT: retq # sched: [1:1.00] 176; 177; GOLDMONT-LABEL: test_sha256msg2: 178; GOLDMONT: # %bb.0: 179; GOLDMONT-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:1.00] 180; GOLDMONT-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [7:1.00] 181; GOLDMONT-NEXT: retq # sched: [4:1.00] 182; 183; CANNONLAKE-LABEL: test_sha256msg2: 184; CANNONLAKE: # %bb.0: 185; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.50] 186; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:0.50] 187; CANNONLAKE-NEXT: retq # sched: [7:1.00] 188; 189; ZNVER1-LABEL: test_sha256msg2: 190; ZNVER1: # %bb.0: 191; ZNVER1-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [100:0.25] 192; ZNVER1-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [100:0.25] 193; ZNVER1-NEXT: retq # sched: [1:0.50] 194 %1 = load <4 x i32>, <4 x i32>* %a2 195 %2 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a0, <4 x i32> %a1) 196 %3 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %2, <4 x i32> %1) 197 ret <4 x i32> %3 198} 199declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) 200 201define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> *%a3) { 202; GENERIC-LABEL: test_sha256rnds2: 203; GENERIC: # %bb.0: 204; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] 205; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] 206; GENERIC-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00] 207; GENERIC-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00] 208; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] 209; GENERIC-NEXT: retq # sched: [1:1.00] 210; 211; GOLDMONT-LABEL: test_sha256rnds2: 212; GOLDMONT: # %bb.0: 213; GOLDMONT-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50] 214; GOLDMONT-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] 215; GOLDMONT-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00] 216; GOLDMONT-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [7:1.00] 217; GOLDMONT-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] 218; GOLDMONT-NEXT: retq # sched: [4:1.00] 219; 220; CANNONLAKE-LABEL: test_sha256rnds2: 221; CANNONLAKE: # %bb.0: 222; CANNONLAKE-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.33] 223; CANNONLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 224; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.50] 225; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:0.50] 226; CANNONLAKE-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.33] 227; CANNONLAKE-NEXT: retq # sched: [7:1.00] 228; 229; ZNVER1-LABEL: test_sha256rnds2: 230; ZNVER1: # %bb.0: 231; ZNVER1-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.25] 232; ZNVER1-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.25] 233; ZNVER1-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00] 234; ZNVER1-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00] 235; ZNVER1-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.25] 236; ZNVER1-NEXT: retq # sched: [1:0.50] 237 %1 = load <4 x i32>, <4 x i32>* %a3 238 %2 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) 239 %3 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %2, <4 x i32> %1, <4 x i32> %a2) 240 ret <4 x i32> %3 241} 242declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) 243