1# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py 2# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=3 < %s | FileCheck %s 3 4# TODO: Fix the processor resource usage for zero-idiom YMM XOR instructions. 5# Those vector XOR instructions should only consume 1cy of JFPU1 (instead 6# of 2cy). 7 8# LLVM-MCA-BEGIN ZERO-IDIOM-1 9 10vaddps %ymm0, %ymm0, %ymm1 11vxorps %ymm1, %ymm1, %ymm1 12vblendps $2, %ymm1, %ymm2, %ymm3 13 14# LLVM-MCA-END 15 16# LLVM-MCA-BEGIN ZERO-IDIOM-2 17 18vaddpd %ymm0, %ymm0, %ymm1 19vxorpd %ymm1, %ymm1, %ymm1 20vblendpd $2, %ymm1, %ymm2, %ymm3 21 22# LLVM-MCA-END 23 24# LLVM-MCA-BEGIN ZERO-IDIOM-3 25vaddps %ymm0, %ymm1, %ymm2 26vandnps %ymm2, %ymm2, %ymm3 27# LLVM-MCA-END 28 29# LLVM-MCA-BEGIN ZERO-IDIOM-4 30vaddps %ymm0, %ymm1, %ymm2 31vandnps %ymm2, %ymm2, %ymm3 32# LLVM-MCA-END 33 34# LLVM-MCA-BEGIN ZERO-IDIOM-5 35vperm2f128 $136, %ymm0, %ymm0, %ymm1 36vaddps %ymm1, %ymm1, %ymm0 37# LLVM-MCA-END 38 39# CHECK: [0] Code Region - ZERO-IDIOM-1 40 41# CHECK: Iterations: 100 42# CHECK-NEXT: Instructions: 300 43# CHECK-NEXT: Total Cycles: 304 44# CHECK-NEXT: Total uOps: 600 45 46# CHECK: Dispatch Width: 2 47# CHECK-NEXT: uOps Per Cycle: 1.97 48# CHECK-NEXT: IPC: 0.99 49# CHECK-NEXT: Block RThroughput: 3.0 50 51# CHECK: Instruction Info: 52# CHECK-NEXT: [1]: #uOps 53# CHECK-NEXT: [2]: Latency 54# CHECK-NEXT: [3]: RThroughput 55# CHECK-NEXT: [4]: MayLoad 56# CHECK-NEXT: [5]: MayStore 57# CHECK-NEXT: [6]: HasSideEffects (U) 58 59# CHECK: [1] [2] [3] [4] [5] [6] Instructions: 60# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm0, %ymm1 61# CHECK-NEXT: 2 1 0.50 vxorps %ymm1, %ymm1, %ymm1 62# CHECK-NEXT: 2 1 1.00 vblendps $2, %ymm1, %ymm2, %ymm3 63 64# CHECK: Resources: 65# CHECK-NEXT: [0] - JALU0 66# CHECK-NEXT: [1] - JALU1 67# CHECK-NEXT: [2] - JDiv 68# CHECK-NEXT: [3] - JFPA 69# CHECK-NEXT: [4] - JFPM 70# CHECK-NEXT: [5] - JFPU0 71# CHECK-NEXT: [6] - JFPU1 72# CHECK-NEXT: [7] - JLAGU 73# CHECK-NEXT: [8] - JMul 74# CHECK-NEXT: [9] - JSAGU 75# CHECK-NEXT: [10] - JSTC 76# CHECK-NEXT: [11] - JVALU0 77# CHECK-NEXT: [12] - JVALU1 78# CHECK-NEXT: [13] - JVIMUL 79 80# CHECK: Resource pressure per iteration: 81# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] 82# CHECK-NEXT: - - - 3.00 2.00 3.00 2.00 - - - - - - - 83 84# CHECK: Resource pressure by instruction: 85# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: 86# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm0, %ymm1 87# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vxorps %ymm1, %ymm1, %ymm1 88# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendps $2, %ymm1, %ymm2, %ymm3 89 90# CHECK: Timeline view: 91# CHECK-NEXT: 0123 92# CHECK-NEXT: Index 0123456789 93 94# CHECK: [0,0] DeeeER . . vaddps %ymm0, %ymm0, %ymm1 95# CHECK-NEXT: [0,1] .DeE-R . . vxorps %ymm1, %ymm1, %ymm1 96# CHECK-NEXT: [0,2] . DeE-R . . vblendps $2, %ymm1, %ymm2, %ymm3 97# CHECK-NEXT: [1,0] . DeeeER . . vaddps %ymm0, %ymm0, %ymm1 98# CHECK-NEXT: [1,1] . DeE-R . . vxorps %ymm1, %ymm1, %ymm1 99# CHECK-NEXT: [1,2] . DeE-R. . vblendps $2, %ymm1, %ymm2, %ymm3 100# CHECK-NEXT: [2,0] . .D=eeeER. vaddps %ymm0, %ymm0, %ymm1 101# CHECK-NEXT: [2,1] . . DeE--R. vxorps %ymm1, %ymm1, %ymm1 102# CHECK-NEXT: [2,2] . . DeE--R vblendps $2, %ymm1, %ymm2, %ymm3 103 104# CHECK: Average Wait times (based on the timeline view): 105# CHECK-NEXT: [0]: Executions 106# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue 107# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready 108# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage 109 110# CHECK: [0] [1] [2] [3] 111# CHECK-NEXT: 0. 3 1.3 1.3 0.0 vaddps %ymm0, %ymm0, %ymm1 112# CHECK-NEXT: 1. 3 1.0 1.0 1.3 vxorps %ymm1, %ymm1, %ymm1 113# CHECK-NEXT: 2. 3 1.0 0.0 1.3 vblendps $2, %ymm1, %ymm2, %ymm3 114# CHECK-NEXT: 3 1.1 0.8 0.9 <total> 115 116# CHECK: [1] Code Region - ZERO-IDIOM-2 117 118# CHECK: Iterations: 100 119# CHECK-NEXT: Instructions: 300 120# CHECK-NEXT: Total Cycles: 304 121# CHECK-NEXT: Total uOps: 600 122 123# CHECK: Dispatch Width: 2 124# CHECK-NEXT: uOps Per Cycle: 1.97 125# CHECK-NEXT: IPC: 0.99 126# CHECK-NEXT: Block RThroughput: 3.0 127 128# CHECK: Instruction Info: 129# CHECK-NEXT: [1]: #uOps 130# CHECK-NEXT: [2]: Latency 131# CHECK-NEXT: [3]: RThroughput 132# CHECK-NEXT: [4]: MayLoad 133# CHECK-NEXT: [5]: MayStore 134# CHECK-NEXT: [6]: HasSideEffects (U) 135 136# CHECK: [1] [2] [3] [4] [5] [6] Instructions: 137# CHECK-NEXT: 2 3 2.00 vaddpd %ymm0, %ymm0, %ymm1 138# CHECK-NEXT: 2 1 0.50 vxorpd %ymm1, %ymm1, %ymm1 139# CHECK-NEXT: 2 1 1.00 vblendpd $2, %ymm1, %ymm2, %ymm3 140 141# CHECK: Resources: 142# CHECK-NEXT: [0] - JALU0 143# CHECK-NEXT: [1] - JALU1 144# CHECK-NEXT: [2] - JDiv 145# CHECK-NEXT: [3] - JFPA 146# CHECK-NEXT: [4] - JFPM 147# CHECK-NEXT: [5] - JFPU0 148# CHECK-NEXT: [6] - JFPU1 149# CHECK-NEXT: [7] - JLAGU 150# CHECK-NEXT: [8] - JMul 151# CHECK-NEXT: [9] - JSAGU 152# CHECK-NEXT: [10] - JSTC 153# CHECK-NEXT: [11] - JVALU0 154# CHECK-NEXT: [12] - JVALU1 155# CHECK-NEXT: [13] - JVIMUL 156 157# CHECK: Resource pressure per iteration: 158# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] 159# CHECK-NEXT: - - - 3.00 2.00 3.00 2.00 - - - - - - - 160 161# CHECK: Resource pressure by instruction: 162# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: 163# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddpd %ymm0, %ymm0, %ymm1 164# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vxorpd %ymm1, %ymm1, %ymm1 165# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendpd $2, %ymm1, %ymm2, %ymm3 166 167# CHECK: Timeline view: 168# CHECK-NEXT: 0123 169# CHECK-NEXT: Index 0123456789 170 171# CHECK: [0,0] DeeeER . . vaddpd %ymm0, %ymm0, %ymm1 172# CHECK-NEXT: [0,1] .DeE-R . . vxorpd %ymm1, %ymm1, %ymm1 173# CHECK-NEXT: [0,2] . DeE-R . . vblendpd $2, %ymm1, %ymm2, %ymm3 174# CHECK-NEXT: [1,0] . DeeeER . . vaddpd %ymm0, %ymm0, %ymm1 175# CHECK-NEXT: [1,1] . DeE-R . . vxorpd %ymm1, %ymm1, %ymm1 176# CHECK-NEXT: [1,2] . DeE-R. . vblendpd $2, %ymm1, %ymm2, %ymm3 177# CHECK-NEXT: [2,0] . .D=eeeER. vaddpd %ymm0, %ymm0, %ymm1 178# CHECK-NEXT: [2,1] . . DeE--R. vxorpd %ymm1, %ymm1, %ymm1 179# CHECK-NEXT: [2,2] . . DeE--R vblendpd $2, %ymm1, %ymm2, %ymm3 180 181# CHECK: Average Wait times (based on the timeline view): 182# CHECK-NEXT: [0]: Executions 183# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue 184# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready 185# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage 186 187# CHECK: [0] [1] [2] [3] 188# CHECK-NEXT: 0. 3 1.3 1.3 0.0 vaddpd %ymm0, %ymm0, %ymm1 189# CHECK-NEXT: 1. 3 1.0 1.0 1.3 vxorpd %ymm1, %ymm1, %ymm1 190# CHECK-NEXT: 2. 3 1.0 0.0 1.3 vblendpd $2, %ymm1, %ymm2, %ymm3 191# CHECK-NEXT: 3 1.1 0.8 0.9 <total> 192 193# CHECK: [2] Code Region - ZERO-IDIOM-3 194 195# CHECK: Iterations: 100 196# CHECK-NEXT: Instructions: 200 197# CHECK-NEXT: Total Cycles: 204 198# CHECK-NEXT: Total uOps: 400 199 200# CHECK: Dispatch Width: 2 201# CHECK-NEXT: uOps Per Cycle: 1.96 202# CHECK-NEXT: IPC: 0.98 203# CHECK-NEXT: Block RThroughput: 2.0 204 205# CHECK: Instruction Info: 206# CHECK-NEXT: [1]: #uOps 207# CHECK-NEXT: [2]: Latency 208# CHECK-NEXT: [3]: RThroughput 209# CHECK-NEXT: [4]: MayLoad 210# CHECK-NEXT: [5]: MayStore 211# CHECK-NEXT: [6]: HasSideEffects (U) 212 213# CHECK: [1] [2] [3] [4] [5] [6] Instructions: 214# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm1, %ymm2 215# CHECK-NEXT: 2 1 0.50 vandnps %ymm2, %ymm2, %ymm3 216 217# CHECK: Resources: 218# CHECK-NEXT: [0] - JALU0 219# CHECK-NEXT: [1] - JALU1 220# CHECK-NEXT: [2] - JDiv 221# CHECK-NEXT: [3] - JFPA 222# CHECK-NEXT: [4] - JFPM 223# CHECK-NEXT: [5] - JFPU0 224# CHECK-NEXT: [6] - JFPU1 225# CHECK-NEXT: [7] - JLAGU 226# CHECK-NEXT: [8] - JMul 227# CHECK-NEXT: [9] - JSAGU 228# CHECK-NEXT: [10] - JSTC 229# CHECK-NEXT: [11] - JVALU0 230# CHECK-NEXT: [12] - JVALU1 231# CHECK-NEXT: [13] - JVIMUL 232 233# CHECK: Resource pressure per iteration: 234# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] 235# CHECK-NEXT: - - - 2.00 1.00 2.00 1.00 - - - - - - - 236 237# CHECK: Resource pressure by instruction: 238# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: 239# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm1, %ymm2 240# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vandnps %ymm2, %ymm2, %ymm3 241 242# CHECK: Timeline view: 243# CHECK-NEXT: Index 0123456789 244 245# CHECK: [0,0] DeeeER . vaddps %ymm0, %ymm1, %ymm2 246# CHECK-NEXT: [0,1] .DeE-R . vandnps %ymm2, %ymm2, %ymm3 247# CHECK-NEXT: [1,0] . DeeeER . vaddps %ymm0, %ymm1, %ymm2 248# CHECK-NEXT: [1,1] . DeE-R . vandnps %ymm2, %ymm2, %ymm3 249# CHECK-NEXT: [2,0] . DeeeER vaddps %ymm0, %ymm1, %ymm2 250# CHECK-NEXT: [2,1] . DeE-R vandnps %ymm2, %ymm2, %ymm3 251 252# CHECK: Average Wait times (based on the timeline view): 253# CHECK-NEXT: [0]: Executions 254# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue 255# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready 256# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage 257 258# CHECK: [0] [1] [2] [3] 259# CHECK-NEXT: 0. 3 1.0 1.0 0.0 vaddps %ymm0, %ymm1, %ymm2 260# CHECK-NEXT: 1. 3 1.0 1.0 1.0 vandnps %ymm2, %ymm2, %ymm3 261# CHECK-NEXT: 3 1.0 1.0 0.5 <total> 262 263# CHECK: [3] Code Region - ZERO-IDIOM-4 264 265# CHECK: Iterations: 100 266# CHECK-NEXT: Instructions: 200 267# CHECK-NEXT: Total Cycles: 204 268# CHECK-NEXT: Total uOps: 400 269 270# CHECK: Dispatch Width: 2 271# CHECK-NEXT: uOps Per Cycle: 1.96 272# CHECK-NEXT: IPC: 0.98 273# CHECK-NEXT: Block RThroughput: 2.0 274 275# CHECK: Instruction Info: 276# CHECK-NEXT: [1]: #uOps 277# CHECK-NEXT: [2]: Latency 278# CHECK-NEXT: [3]: RThroughput 279# CHECK-NEXT: [4]: MayLoad 280# CHECK-NEXT: [5]: MayStore 281# CHECK-NEXT: [6]: HasSideEffects (U) 282 283# CHECK: [1] [2] [3] [4] [5] [6] Instructions: 284# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm1, %ymm2 285# CHECK-NEXT: 2 1 0.50 vandnps %ymm2, %ymm2, %ymm3 286 287# CHECK: Resources: 288# CHECK-NEXT: [0] - JALU0 289# CHECK-NEXT: [1] - JALU1 290# CHECK-NEXT: [2] - JDiv 291# CHECK-NEXT: [3] - JFPA 292# CHECK-NEXT: [4] - JFPM 293# CHECK-NEXT: [5] - JFPU0 294# CHECK-NEXT: [6] - JFPU1 295# CHECK-NEXT: [7] - JLAGU 296# CHECK-NEXT: [8] - JMul 297# CHECK-NEXT: [9] - JSAGU 298# CHECK-NEXT: [10] - JSTC 299# CHECK-NEXT: [11] - JVALU0 300# CHECK-NEXT: [12] - JVALU1 301# CHECK-NEXT: [13] - JVIMUL 302 303# CHECK: Resource pressure per iteration: 304# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] 305# CHECK-NEXT: - - - 2.00 1.00 2.00 1.00 - - - - - - - 306 307# CHECK: Resource pressure by instruction: 308# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: 309# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm1, %ymm2 310# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vandnps %ymm2, %ymm2, %ymm3 311 312# CHECK: Timeline view: 313# CHECK-NEXT: Index 0123456789 314 315# CHECK: [0,0] DeeeER . vaddps %ymm0, %ymm1, %ymm2 316# CHECK-NEXT: [0,1] .DeE-R . vandnps %ymm2, %ymm2, %ymm3 317# CHECK-NEXT: [1,0] . DeeeER . vaddps %ymm0, %ymm1, %ymm2 318# CHECK-NEXT: [1,1] . DeE-R . vandnps %ymm2, %ymm2, %ymm3 319# CHECK-NEXT: [2,0] . DeeeER vaddps %ymm0, %ymm1, %ymm2 320# CHECK-NEXT: [2,1] . DeE-R vandnps %ymm2, %ymm2, %ymm3 321 322# CHECK: Average Wait times (based on the timeline view): 323# CHECK-NEXT: [0]: Executions 324# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue 325# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready 326# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage 327 328# CHECK: [0] [1] [2] [3] 329# CHECK-NEXT: 0. 3 1.0 1.0 0.0 vaddps %ymm0, %ymm1, %ymm2 330# CHECK-NEXT: 1. 3 1.0 1.0 1.0 vandnps %ymm2, %ymm2, %ymm3 331# CHECK-NEXT: 3 1.0 1.0 0.5 <total> 332 333# CHECK: [4] Code Region - ZERO-IDIOM-5 334 335# CHECK: Iterations: 100 336# CHECK-NEXT: Instructions: 200 337# CHECK-NEXT: Total Cycles: 205 338# CHECK-NEXT: Total uOps: 400 339 340# CHECK: Dispatch Width: 2 341# CHECK-NEXT: uOps Per Cycle: 1.95 342# CHECK-NEXT: IPC: 0.98 343# CHECK-NEXT: Block RThroughput: 2.0 344 345# CHECK: Instruction Info: 346# CHECK-NEXT: [1]: #uOps 347# CHECK-NEXT: [2]: Latency 348# CHECK-NEXT: [3]: RThroughput 349# CHECK-NEXT: [4]: MayLoad 350# CHECK-NEXT: [5]: MayStore 351# CHECK-NEXT: [6]: HasSideEffects (U) 352 353# CHECK: [1] [2] [3] [4] [5] [6] Instructions: 354# CHECK-NEXT: 2 1 0.50 vperm2f128 $136, %ymm0, %ymm0, %ymm1 355# CHECK-NEXT: 2 3 2.00 vaddps %ymm1, %ymm1, %ymm0 356 357# CHECK: Resources: 358# CHECK-NEXT: [0] - JALU0 359# CHECK-NEXT: [1] - JALU1 360# CHECK-NEXT: [2] - JDiv 361# CHECK-NEXT: [3] - JFPA 362# CHECK-NEXT: [4] - JFPM 363# CHECK-NEXT: [5] - JFPU0 364# CHECK-NEXT: [6] - JFPU1 365# CHECK-NEXT: [7] - JLAGU 366# CHECK-NEXT: [8] - JMul 367# CHECK-NEXT: [9] - JSAGU 368# CHECK-NEXT: [10] - JSTC 369# CHECK-NEXT: [11] - JVALU0 370# CHECK-NEXT: [12] - JVALU1 371# CHECK-NEXT: [13] - JVIMUL 372 373# CHECK: Resource pressure per iteration: 374# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] 375# CHECK-NEXT: - - - 2.00 1.00 2.00 1.00 - - - - - - - 376 377# CHECK: Resource pressure by instruction: 378# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: 379# CHECK-NEXT: - - - - 1.00 - 1.00 - - - - - - - vperm2f128 $136, %ymm0, %ymm0, %ymm1 380# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm1, %ymm1, %ymm0 381 382# CHECK: Timeline view: 383# CHECK-NEXT: 0 384# CHECK-NEXT: Index 0123456789 385 386# CHECK: [0,0] DeER . . vperm2f128 $136, %ymm0, %ymm0, %ymm1 387# CHECK-NEXT: [0,1] .DeeeER . vaddps %ymm1, %ymm1, %ymm0 388# CHECK-NEXT: [1,0] . DeE-R . vperm2f128 $136, %ymm0, %ymm0, %ymm1 389# CHECK-NEXT: [1,1] . DeeeER . vaddps %ymm1, %ymm1, %ymm0 390# CHECK-NEXT: [2,0] . DeE-R . vperm2f128 $136, %ymm0, %ymm0, %ymm1 391# CHECK-NEXT: [2,1] . DeeeER vaddps %ymm1, %ymm1, %ymm0 392 393# CHECK: Average Wait times (based on the timeline view): 394# CHECK-NEXT: [0]: Executions 395# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue 396# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready 397# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage 398 399# CHECK: [0] [1] [2] [3] 400# CHECK-NEXT: 0. 3 1.0 1.0 0.7 vperm2f128 $136, %ymm0, %ymm0, %ymm1 401# CHECK-NEXT: 1. 3 1.0 0.0 0.0 vaddps %ymm1, %ymm1, %ymm0 402# CHECK-NEXT: 3 1.0 0.5 0.3 <total> 403