1//=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the machine model for Haswell to support instruction 11// scheduling and other instruction cost heuristics. 12// 13//===----------------------------------------------------------------------===// 14 15def HaswellModel : SchedMachineModel { 16 // All x86 instructions are modeled as a single micro-op, and HW can decode 4 17 // instructions per cycle. 18 let IssueWidth = 4; 19 let MicroOpBufferSize = 192; // Based on the reorder buffer. 20 let LoadLatency = 4; 21 let MispredictPenalty = 16; 22 23 // Based on the LSD (loop-stream detector) queue size and benchmarking data. 24 let LoopMicroOpBufferSize = 50; 25 26 // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow 27 // the scheduler to assign a default model to unrecognized opcodes. 28 let CompleteModel = 0; 29} 30 31let SchedModel = HaswellModel in { 32 33// Haswell can issue micro-ops to 8 different ports in one cycle. 34 35// Ports 0, 1, 5, and 6 handle all computation. 36// Port 4 gets the data half of stores. Store data can be available later than 37// the store address, but since we don't model the latency of stores, we can 38// ignore that. 39// Ports 2 and 3 are identical. They handle loads and the address half of 40// stores. Port 7 can handle address calculations. 41def HWPort0 : ProcResource<1>; 42def HWPort1 : ProcResource<1>; 43def HWPort2 : ProcResource<1>; 44def HWPort3 : ProcResource<1>; 45def HWPort4 : ProcResource<1>; 46def HWPort5 : ProcResource<1>; 47def HWPort6 : ProcResource<1>; 48def HWPort7 : ProcResource<1>; 49 50// Many micro-ops are capable of issuing on multiple ports. 51def HWPort01 : ProcResGroup<[HWPort0, HWPort1]>; 52def HWPort23 : ProcResGroup<[HWPort2, HWPort3]>; 53def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>; 54def HWPort04 : ProcResGroup<[HWPort0, HWPort4]>; 55def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>; 56def HWPort06 : ProcResGroup<[HWPort0, HWPort6]>; 57def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>; 58def HWPort16 : ProcResGroup<[HWPort1, HWPort6]>; 59def HWPort56 : ProcResGroup<[HWPort5, HWPort6]>; 60def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>; 61def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>; 62def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>; 63 64// 60 Entry Unified Scheduler 65def HWPortAny : ProcResGroup<[HWPort0, HWPort1, HWPort2, HWPort3, HWPort4, 66 HWPort5, HWPort6, HWPort7]> { 67 let BufferSize=60; 68} 69 70// Integer division issued on port 0. 71def HWDivider : ProcResource<1>; 72 73// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4 74// cycles after the memory operand. 75def : ReadAdvance<ReadAfterLd, 4>; 76 77// Many SchedWrites are defined in pairs with and without a folded load. 78// Instructions with folded loads are usually micro-fused, so they only appear 79// as two micro-ops when queued in the reservation station. 80// This multiclass defines the resource usage for variants with and without 81// folded loads. 82multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW, 83 ProcResourceKind ExePort, 84 int Lat> { 85 // Register variant is using a single cycle on ExePort. 86 def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } 87 88 // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the 89 // latency. 90 def : WriteRes<SchedRW.Folded, [HWPort23, ExePort]> { 91 let Latency = !add(Lat, 4); 92 } 93} 94 95// A folded store needs a cycle on port 4 for the store data, but it does not 96// need an extra port 2/3 cycle to recompute the address. 97def : WriteRes<WriteRMW, [HWPort4]>; 98 99// Store_addr on 237. 100// Store_data on 4. 101def : WriteRes<WriteStore, [HWPort237, HWPort4]>; 102def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 4; } 103def : WriteRes<WriteMove, [HWPort0156]>; 104def : WriteRes<WriteZero, []>; 105 106defm : HWWriteResPair<WriteALU, HWPort0156, 1>; 107defm : HWWriteResPair<WriteIMul, HWPort1, 3>; 108def : WriteRes<WriteIMulH, []> { let Latency = 3; } 109defm : HWWriteResPair<WriteShift, HWPort06, 1>; 110defm : HWWriteResPair<WriteJump, HWPort06, 1>; 111 112// This is for simple LEAs with one or two input operands. 113// The complex ones can only execute on port 1, and they require two cycles on 114// the port to read all inputs. We don't model that. 115def : WriteRes<WriteLEA, [HWPort15]>; 116 117// This is quite rough, latency depends on the dividend. 118def : WriteRes<WriteIDiv, [HWPort0, HWDivider]> { 119 let Latency = 25; 120 let ResourceCycles = [1, 10]; 121} 122def : WriteRes<WriteIDivLd, [HWPort23, HWPort0, HWDivider]> { 123 let Latency = 29; 124 let ResourceCycles = [1, 1, 10]; 125} 126 127// Scalar and vector floating point. 128defm : HWWriteResPair<WriteFAdd, HWPort1, 3>; 129defm : HWWriteResPair<WriteFMul, HWPort0, 5>; 130defm : HWWriteResPair<WriteFDiv, HWPort0, 12>; // 10-14 cycles. 131defm : HWWriteResPair<WriteFRcp, HWPort0, 5>; 132defm : HWWriteResPair<WriteFRsqrt, HWPort0, 5>; 133defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>; 134defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>; 135defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>; 136defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>; 137defm : HWWriteResPair<WriteFShuffle, HWPort5, 1>; 138defm : HWWriteResPair<WriteFBlend, HWPort015, 1>; 139defm : HWWriteResPair<WriteFShuffle256, HWPort5, 3>; 140 141def : WriteRes<WriteFVarBlend, [HWPort5]> { 142 let Latency = 2; 143 let ResourceCycles = [2]; 144} 145def : WriteRes<WriteFVarBlendLd, [HWPort5, HWPort23]> { 146 let Latency = 6; 147 let ResourceCycles = [2, 1]; 148} 149 150// Vector integer operations. 151defm : HWWriteResPair<WriteVecShift, HWPort0, 1>; 152defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>; 153defm : HWWriteResPair<WriteVecALU, HWPort15, 1>; 154defm : HWWriteResPair<WriteVecIMul, HWPort0, 5>; 155defm : HWWriteResPair<WriteShuffle, HWPort5, 1>; 156defm : HWWriteResPair<WriteBlend, HWPort15, 1>; 157defm : HWWriteResPair<WriteShuffle256, HWPort5, 3>; 158 159def : WriteRes<WriteVarBlend, [HWPort5]> { 160 let Latency = 2; 161 let ResourceCycles = [2]; 162} 163def : WriteRes<WriteVarBlendLd, [HWPort5, HWPort23]> { 164 let Latency = 6; 165 let ResourceCycles = [2, 1]; 166} 167 168def : WriteRes<WriteVarVecShift, [HWPort0, HWPort5]> { 169 let Latency = 2; 170 let ResourceCycles = [2, 1]; 171} 172def : WriteRes<WriteVarVecShiftLd, [HWPort0, HWPort5, HWPort23]> { 173 let Latency = 6; 174 let ResourceCycles = [2, 1, 1]; 175} 176 177def : WriteRes<WriteMPSAD, [HWPort0, HWPort5]> { 178 let Latency = 6; 179 let ResourceCycles = [1, 2]; 180} 181def : WriteRes<WriteMPSADLd, [HWPort23, HWPort0, HWPort5]> { 182 let Latency = 6; 183 let ResourceCycles = [1, 1, 2]; 184} 185 186// String instructions. 187// Packed Compare Implicit Length Strings, Return Mask 188def : WriteRes<WritePCmpIStrM, [HWPort0]> { 189 let Latency = 10; 190 let ResourceCycles = [3]; 191} 192def : WriteRes<WritePCmpIStrMLd, [HWPort0, HWPort23]> { 193 let Latency = 10; 194 let ResourceCycles = [3, 1]; 195} 196 197// Packed Compare Explicit Length Strings, Return Mask 198def : WriteRes<WritePCmpEStrM, [HWPort0, HWPort16, HWPort5]> { 199 let Latency = 10; 200 let ResourceCycles = [3, 2, 4]; 201} 202def : WriteRes<WritePCmpEStrMLd, [HWPort05, HWPort16, HWPort23]> { 203 let Latency = 10; 204 let ResourceCycles = [6, 2, 1]; 205} 206 207// Packed Compare Implicit Length Strings, Return Index 208def : WriteRes<WritePCmpIStrI, [HWPort0]> { 209 let Latency = 11; 210 let ResourceCycles = [3]; 211} 212def : WriteRes<WritePCmpIStrILd, [HWPort0, HWPort23]> { 213 let Latency = 11; 214 let ResourceCycles = [3, 1]; 215} 216 217// Packed Compare Explicit Length Strings, Return Index 218def : WriteRes<WritePCmpEStrI, [HWPort05, HWPort16]> { 219 let Latency = 11; 220 let ResourceCycles = [6, 2]; 221} 222def : WriteRes<WritePCmpEStrILd, [HWPort0, HWPort16, HWPort5, HWPort23]> { 223 let Latency = 11; 224 let ResourceCycles = [3, 2, 2, 1]; 225} 226 227// AES Instructions. 228def : WriteRes<WriteAESDecEnc, [HWPort5]> { 229 let Latency = 7; 230 let ResourceCycles = [1]; 231} 232def : WriteRes<WriteAESDecEncLd, [HWPort5, HWPort23]> { 233 let Latency = 7; 234 let ResourceCycles = [1, 1]; 235} 236 237def : WriteRes<WriteAESIMC, [HWPort5]> { 238 let Latency = 14; 239 let ResourceCycles = [2]; 240} 241def : WriteRes<WriteAESIMCLd, [HWPort5, HWPort23]> { 242 let Latency = 14; 243 let ResourceCycles = [2, 1]; 244} 245 246def : WriteRes<WriteAESKeyGen, [HWPort0, HWPort5]> { 247 let Latency = 10; 248 let ResourceCycles = [2, 8]; 249} 250def : WriteRes<WriteAESKeyGenLd, [HWPort0, HWPort5, HWPort23]> { 251 let Latency = 10; 252 let ResourceCycles = [2, 7, 1]; 253} 254 255// Carry-less multiplication instructions. 256def : WriteRes<WriteCLMul, [HWPort0, HWPort5]> { 257 let Latency = 7; 258 let ResourceCycles = [2, 1]; 259} 260def : WriteRes<WriteCLMulLd, [HWPort0, HWPort5, HWPort23]> { 261 let Latency = 7; 262 let ResourceCycles = [2, 1, 1]; 263} 264 265def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; } 266def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; } 267def : WriteRes<WriteFence, [HWPort23, HWPort4]>; 268def : WriteRes<WriteNop, []>; 269 270//================ Exceptions ================// 271 272//-- Specific Scheduling Models --// 273 274// Starting with P0. 275def WriteP0 : SchedWriteRes<[HWPort0]>; 276 277def WriteP0_P1_Lat4 : SchedWriteRes<[HWPort0, HWPort1]> { 278 let Latency = 4; 279 let NumMicroOps = 2; 280 let ResourceCycles = [1, 1]; 281} 282 283def WriteP0_P1_Lat4Ld : SchedWriteRes<[HWPort0, HWPort1, HWPort23]> { 284 let Latency = 8; 285 let NumMicroOps = 3; 286 let ResourceCycles = [1, 1, 1]; 287} 288 289def WriteP01 : SchedWriteRes<[HWPort01]>; 290 291def Write2P01 : SchedWriteRes<[HWPort01]> { 292 let NumMicroOps = 2; 293} 294def Write3P01 : SchedWriteRes<[HWPort01]> { 295 let NumMicroOps = 3; 296} 297 298def WriteP015 : SchedWriteRes<[HWPort015]>; 299 300def WriteP01_P5 : SchedWriteRes<[HWPort01, HWPort5]> { 301 let NumMicroOps = 2; 302} 303def WriteP06 : SchedWriteRes<[HWPort06]>; 304 305def Write2P06 : SchedWriteRes<[HWPort06]> { 306 let Latency = 1; 307 let NumMicroOps = 2; 308 let ResourceCycles = [2]; 309} 310 311def Write3P06_Lat2 : SchedWriteRes<[HWPort06]> { 312 let Latency = 2; 313 let NumMicroOps = 3; 314 let ResourceCycles = [3]; 315} 316 317def WriteP0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> { 318 let NumMicroOps = 2; 319} 320 321def Write2P0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> { 322 let NumMicroOps = 3; 323 let ResourceCycles = [2, 1]; 324} 325 326def Write2P0156_Lat2 : SchedWriteRes<[HWPort0156]> { 327 let Latency = 2; 328 let ResourceCycles = [2]; 329} 330def Write2P0156_Lat2Ld : SchedWriteRes<[HWPort0156, HWPort23]> { 331 let Latency = 6; 332 let ResourceCycles = [2, 1]; 333} 334 335def Write5P0156 : SchedWriteRes<[HWPort0156]> { 336 let NumMicroOps = 5; 337 let ResourceCycles = [5]; 338} 339 340def WriteP0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { 341 let Latency = 1; 342 let ResourceCycles = [1, 2, 1]; 343} 344 345def Write2P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { 346 let Latency = 1; 347 let ResourceCycles = [2, 2, 1]; 348} 349 350def Write3P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { 351 let Latency = 1; 352 let ResourceCycles = [3, 2, 1]; 353} 354 355// Starting with P1. 356def WriteP1 : SchedWriteRes<[HWPort1]>; 357 358def WriteP1_P23 : SchedWriteRes<[HWPort1, HWPort23]> { 359 let NumMicroOps = 2; 360} 361def WriteP1_Lat3 : SchedWriteRes<[HWPort1]> { 362 let Latency = 3; 363} 364def WriteP1_Lat3Ld : SchedWriteRes<[HWPort1, HWPort23]> { 365 let Latency = 7; 366} 367 368def Write2P1 : SchedWriteRes<[HWPort1]> { 369 let NumMicroOps = 2; 370 let ResourceCycles = [2]; 371} 372def Write2P1_P23 : SchedWriteRes<[HWPort1, HWPort23]> { 373 let NumMicroOps = 3; 374 let ResourceCycles = [2, 1]; 375} 376def WriteP15 : SchedWriteRes<[HWPort15]>; 377def WriteP15Ld : SchedWriteRes<[HWPort15, HWPort23]> { 378 let Latency = 4; 379} 380 381def WriteP1_P5_Lat4 : SchedWriteRes<[HWPort1, HWPort5]> { 382 let Latency = 4; 383 let NumMicroOps = 2; 384 let ResourceCycles = [1, 1]; 385} 386 387def WriteP1_P5_Lat4Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { 388 let Latency = 8; 389 let NumMicroOps = 3; 390 let ResourceCycles = [1, 1, 1]; 391} 392 393def WriteP1_P5_Lat6 : SchedWriteRes<[HWPort1, HWPort5]> { 394 let Latency = 6; 395 let NumMicroOps = 2; 396 let ResourceCycles = [1, 1]; 397} 398 399def WriteP1_P5_Lat6Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { 400 let Latency = 10; 401 let NumMicroOps = 3; 402 let ResourceCycles = [1, 1, 1]; 403} 404 405// Starting with P2. 406def Write2P237_P4 : SchedWriteRes<[HWPort237, HWPort4]> { 407 let Latency = 1; 408 let ResourceCycles = [2, 1]; 409} 410 411// Starting with P5. 412def WriteP5 : SchedWriteRes<[HWPort5]>; 413def WriteP5Ld : SchedWriteRes<[HWPort5, HWPort23]> { 414 let Latency = 5; 415 let NumMicroOps = 2; 416 let ResourceCycles = [1, 1]; 417} 418 419// Notation: 420// - r: register. 421// - mm: 64 bit mmx register. 422// - x = 128 bit xmm register. 423// - (x)mm = mmx or xmm register. 424// - y = 256 bit ymm register. 425// - v = any vector register. 426// - m = memory. 427 428//=== Integer Instructions ===// 429//-- Move instructions --// 430 431// MOV. 432// r16,m. 433def : InstRW<[WriteALULd], (instregex "MOV16rm")>; 434 435// MOVSX, MOVZX. 436// r,m. 437def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>; 438 439// CMOVcc. 440// r,r. 441def : InstRW<[Write2P0156_Lat2], 442 (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rr")>; 443// r,m. 444def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd], 445 (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rm")>; 446 447// XCHG. 448// r,r. 449def WriteXCHG : SchedWriteRes<[HWPort0156]> { 450 let Latency = 2; 451 let ResourceCycles = [3]; 452} 453 454def : InstRW<[WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>; 455 456// r,m. 457def WriteXCHGrm : SchedWriteRes<[]> { 458 let Latency = 21; 459 let NumMicroOps = 8; 460} 461def : InstRW<[WriteXCHGrm], (instregex "XCHG(8|16|32|64)rm")>; 462 463// XLAT. 464def WriteXLAT : SchedWriteRes<[]> { 465 let Latency = 7; 466 let NumMicroOps = 3; 467} 468def : InstRW<[WriteXLAT], (instregex "XLAT")>; 469 470// PUSH. 471// m. 472def : InstRW<[Write2P237_P4], (instregex "PUSH(16|32)rmm")>; 473 474// PUSHF. 475def WritePushF : SchedWriteRes<[HWPort1, HWPort4, HWPort237, HWPort06]> { 476 let NumMicroOps = 4; 477} 478def : InstRW<[WritePushF], (instregex "PUSHF(16|32)")>; 479 480// PUSHA. 481def WritePushA : SchedWriteRes<[]> { 482 let NumMicroOps = 19; 483} 484def : InstRW<[WritePushA], (instregex "PUSHA(16|32)")>; 485 486// POP. 487// m. 488def : InstRW<[Write2P237_P4], (instregex "POP(16|32)rmm")>; 489 490// POPF. 491def WritePopF : SchedWriteRes<[]> { 492 let NumMicroOps = 9; 493} 494def : InstRW<[WritePopF], (instregex "POPF(16|32)")>; 495 496// POPA. 497def WritePopA : SchedWriteRes<[]> { 498 let NumMicroOps = 18; 499} 500def : InstRW<[WritePopA], (instregex "POPA(16|32)")>; 501 502// LAHF SAHF. 503def : InstRW<[WriteP06], (instregex "(S|L)AHF")>; 504 505// BSWAP. 506// r32. 507def WriteBSwap32 : SchedWriteRes<[HWPort15]>; 508def : InstRW<[WriteBSwap32], (instregex "BSWAP32r")>; 509 510// r64. 511def WriteBSwap64 : SchedWriteRes<[HWPort06, HWPort15]> { 512 let NumMicroOps = 2; 513} 514def : InstRW<[WriteBSwap64], (instregex "BSWAP64r")>; 515 516// MOVBE. 517// r16,m16 / r64,m64. 518def : InstRW<[Write2P0156_Lat2Ld], (instregex "MOVBE(16|64)rm")>; 519 520// r32, m32. 521def WriteMoveBE32rm : SchedWriteRes<[HWPort15, HWPort23]> { 522 let NumMicroOps = 2; 523} 524def : InstRW<[WriteMoveBE32rm], (instregex "MOVBE32rm")>; 525 526// m16,r16. 527def WriteMoveBE16mr : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { 528 let NumMicroOps = 3; 529} 530def : InstRW<[WriteMoveBE16mr], (instregex "MOVBE16mr")>; 531 532// m32,r32. 533def WriteMoveBE32mr : SchedWriteRes<[HWPort15, HWPort237, HWPort4]> { 534 let NumMicroOps = 3; 535} 536def : InstRW<[WriteMoveBE32mr], (instregex "MOVBE32mr")>; 537 538// m64,r64. 539def WriteMoveBE64mr : SchedWriteRes<[HWPort06, HWPort15, HWPort237, HWPort4]> { 540 let NumMicroOps = 4; 541} 542def : InstRW<[WriteMoveBE64mr], (instregex "MOVBE64mr")>; 543 544//-- Arithmetic instructions --// 545 546// ADD SUB. 547// m,r/i. 548def : InstRW<[Write2P0156_2P237_P4], 549 (instregex "(ADD|SUB)(8|16|32|64)m(r|i)", 550 "(ADD|SUB)(8|16|32|64)mi8", "(ADD|SUB)64mi32")>; 551 552// ADC SBB. 553// r,r/i. 554def : InstRW<[Write2P0156_Lat2], (instregex "(ADC|SBB)(8|16|32|64)r(r|i)", 555 "(ADC|SBB)(16|32|64)ri8", 556 "(ADC|SBB)64ri32", 557 "(ADC|SBB)(8|16|32|64)rr_REV")>; 558 559// r,m. 560def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd], (instregex "(ADC|SBB)(8|16|32|64)rm")>; 561 562// m,r/i. 563def : InstRW<[Write3P0156_2P237_P4], 564 (instregex "(ADC|SBB)(8|16|32|64)m(r|i)", 565 "(ADC|SBB)(16|32|64)mi8", 566 "(ADC|SBB)64mi32")>; 567 568// INC DEC NOT NEG. 569// m. 570def : InstRW<[WriteP0156_2P237_P4], 571 (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m", 572 "(INC|DEC)64(16|32)m")>; 573 574// MUL IMUL. 575// r16. 576def WriteMul16 : SchedWriteRes<[HWPort1, HWPort0156]> { 577 let Latency = 4; 578 let NumMicroOps = 4; 579} 580def : InstRW<[WriteMul16], (instregex "IMUL16r", "MUL16r")>; 581 582// m16. 583def WriteMul16Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { 584 let Latency = 8; 585 let NumMicroOps = 5; 586} 587def : InstRW<[WriteMul16Ld], (instregex "IMUL16m", "MUL16m")>; 588 589// r32. 590def WriteMul32 : SchedWriteRes<[HWPort1, HWPort0156]> { 591 let Latency = 4; 592 let NumMicroOps = 3; 593} 594def : InstRW<[WriteMul32], (instregex "IMUL32r", "MUL32r")>; 595 596// m32. 597def WriteMul32Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { 598 let Latency = 8; 599 let NumMicroOps = 4; 600} 601def : InstRW<[WriteMul32Ld], (instregex "IMUL32m", "MUL32m")>; 602 603// r64. 604def WriteMul64 : SchedWriteRes<[HWPort1, HWPort6]> { 605 let Latency = 3; 606 let NumMicroOps = 2; 607} 608def : InstRW<[WriteMul64], (instregex "IMUL64r", "MUL64r")>; 609 610// m64. 611def WriteMul64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> { 612 let Latency = 7; 613 let NumMicroOps = 3; 614} 615def : InstRW<[WriteMul64Ld], (instregex "IMUL64m", "MUL64m")>; 616 617// r16,r16. 618def WriteMul16rri : SchedWriteRes<[HWPort1, HWPort0156]> { 619 let Latency = 4; 620 let NumMicroOps = 2; 621} 622def : InstRW<[WriteMul16rri], (instregex "IMUL16rri", "IMUL16rri8")>; 623 624// r16,m16. 625def WriteMul16rmi : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { 626 let Latency = 8; 627 let NumMicroOps = 3; 628} 629def : InstRW<[WriteMul16rmi], (instregex "IMUL16rmi", "IMUL16rmi8")>; 630 631// MULX. 632// r32,r32,r32. 633def WriteMulX32 : SchedWriteRes<[HWPort1, HWPort056]> { 634 let Latency = 4; 635 let NumMicroOps = 3; 636 let ResourceCycles = [1, 2]; 637} 638def : InstRW<[WriteMulX32], (instregex "MULX32rr")>; 639 640// r32,r32,m32. 641def WriteMulX32Ld : SchedWriteRes<[HWPort1, HWPort056, HWPort23]> { 642 let Latency = 8; 643 let NumMicroOps = 4; 644 let ResourceCycles = [1, 2, 1]; 645} 646def : InstRW<[WriteMulX32Ld], (instregex "MULX32rm")>; 647 648// r64,r64,r64. 649def WriteMulX64 : SchedWriteRes<[HWPort1, HWPort6]> { 650 let Latency = 4; 651 let NumMicroOps = 2; 652} 653def : InstRW<[WriteMulX64], (instregex "MULX64rr")>; 654 655// r64,r64,m64. 656def WriteMulX64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> { 657 let Latency = 8; 658 let NumMicroOps = 3; 659} 660def : InstRW<[WriteMulX64Ld], (instregex "MULX64rm")>; 661 662// DIV. 663// r8. 664def WriteDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 665 let Latency = 22; 666 let NumMicroOps = 9; 667} 668def : InstRW<[WriteDiv8], (instregex "DIV8r")>; 669 670// r16. 671def WriteDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 672 let Latency = 23; 673 let NumMicroOps = 10; 674} 675def : InstRW<[WriteDiv16], (instregex "DIV16r")>; 676 677// r32. 678def WriteDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 679 let Latency = 22; 680 let NumMicroOps = 10; 681} 682def : InstRW<[WriteDiv32], (instregex "DIV32r")>; 683 684// r64. 685def WriteDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 686 let Latency = 32; 687 let NumMicroOps = 36; 688} 689def : InstRW<[WriteDiv64], (instregex "DIV64r")>; 690 691// IDIV. 692// r8. 693def WriteIDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 694 let Latency = 23; 695 let NumMicroOps = 9; 696} 697def : InstRW<[WriteIDiv8], (instregex "IDIV8r")>; 698 699// r16. 700def WriteIDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 701 let Latency = 23; 702 let NumMicroOps = 10; 703} 704def : InstRW<[WriteIDiv16], (instregex "IDIV16r")>; 705 706// r32. 707def WriteIDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 708 let Latency = 22; 709 let NumMicroOps = 9; 710} 711def : InstRW<[WriteIDiv32], (instregex "IDIV32r")>; 712 713// r64. 714def WriteIDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { 715 let Latency = 39; 716 let NumMicroOps = 59; 717} 718def : InstRW<[WriteIDiv64], (instregex "IDIV64r")>; 719 720//-- Logic instructions --// 721 722// AND OR XOR. 723// m,r/i. 724def : InstRW<[Write2P0156_2P237_P4], 725 (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)", 726 "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>; 727 728// SHR SHL SAR. 729// m,i. 730def WriteShiftRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { 731 let NumMicroOps = 4; 732 let ResourceCycles = [2, 1, 1]; 733} 734def : InstRW<[WriteShiftRMW], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>; 735 736// r,cl. 737def : InstRW<[Write3P06_Lat2], (instregex "S(A|H)(R|L)(8|16|32|64)rCL")>; 738 739// m,cl. 740def WriteShiftClLdRMW : SchedWriteRes<[HWPort06, HWPort23, HWPort4]> { 741 let NumMicroOps = 6; 742 let ResourceCycles = [3, 2, 1]; 743} 744def : InstRW<[WriteShiftClLdRMW], (instregex "S(A|H)(R|L)(8|16|32|64)mCL")>; 745 746// ROR ROL. 747// r,1. 748def : InstRW<[Write2P06], (instregex "RO(R|L)(8|16|32|64)r1")>; 749 750// m,i. 751def WriteRotateRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { 752 let NumMicroOps = 5; 753 let ResourceCycles = [2, 2, 1]; 754} 755def : InstRW<[WriteRotateRMW], (instregex "RO(R|L)(8|16|32|64)mi")>; 756 757// r,cl. 758def : InstRW<[Write3P06_Lat2], (instregex "RO(R|L)(8|16|32|64)rCL")>; 759 760// m,cl. 761def WriteRotateRMWCL : SchedWriteRes<[]> { 762 let NumMicroOps = 6; 763} 764def : InstRW<[WriteRotateRMWCL], (instregex "RO(R|L)(8|16|32|64)mCL")>; 765 766// RCR RCL. 767// r,1. 768def WriteRCr1 : SchedWriteRes<[HWPort06, HWPort0156]> { 769 let Latency = 2; 770 let NumMicroOps = 3; 771 let ResourceCycles = [2, 1]; 772} 773def : InstRW<[WriteRCr1], (instregex "RC(R|L)(8|16|32|64)r1")>; 774 775// m,1. 776def WriteRCm1 : SchedWriteRes<[]> { 777 let NumMicroOps = 6; 778} 779def : InstRW<[WriteRCm1], (instregex "RC(R|L)(8|16|32|64)m1")>; 780 781// r,i. 782def WriteRCri : SchedWriteRes<[HWPort0156]> { 783 let Latency = 6; 784 let NumMicroOps = 8; 785} 786def : InstRW<[WriteRCri], (instregex "RC(R|L)(8|16|32|64)r(i|CL)")>; 787 788// m,i. 789def WriteRCmi : SchedWriteRes<[]> { 790 let NumMicroOps = 11; 791} 792def : InstRW<[WriteRCmi], (instregex "RC(R|L)(8|16|32|64)m(i|CL)")>; 793 794// SHRD SHLD. 795// r,r,i. 796def WriteShDrr : SchedWriteRes<[HWPort1]> { 797 let Latency = 3; 798} 799def : InstRW<[WriteShDrr], (instregex "SH(R|L)D(16|32|64)rri8")>; 800 801// m,r,i. 802def WriteShDmr : SchedWriteRes<[]> { 803 let NumMicroOps = 5; 804} 805def : InstRW<[WriteShDmr], (instregex "SH(R|L)D(16|32|64)mri8")>; 806 807// r,r,cl. 808def WriteShlDCL : SchedWriteRes<[HWPort0156]> { 809 let Latency = 3; 810 let NumMicroOps = 4; 811} 812def : InstRW<[WriteShlDCL], (instregex "SHLD(16|32|64)rrCL")>; 813 814// r,r,cl. 815def WriteShrDCL : SchedWriteRes<[HWPort0156]> { 816 let Latency = 4; 817 let NumMicroOps = 4; 818} 819def : InstRW<[WriteShrDCL], (instregex "SHRD(16|32|64)rrCL")>; 820 821// m,r,cl. 822def WriteShDmrCL : SchedWriteRes<[]> { 823 let NumMicroOps = 7; 824} 825def : InstRW<[WriteShDmrCL], (instregex "SH(R|L)D(16|32|64)mrCL")>; 826 827// BT. 828// r,r/i. 829def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>; 830 831// m,r. 832def WriteBTmr : SchedWriteRes<[]> { 833 let NumMicroOps = 10; 834} 835def : InstRW<[WriteBTmr], (instregex "BT(16|32|64)mr")>; 836 837// m,i. 838def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>; 839 840// BTR BTS BTC. 841// r,r,i. 842def : InstRW<[WriteShift], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>; 843 844// m,r. 845def WriteBTRSCmr : SchedWriteRes<[]> { 846 let NumMicroOps = 11; 847} 848def : InstRW<[WriteBTRSCmr], (instregex "BT(R|S|C)(16|32|64)mr")>; 849 850// m,i. 851def : InstRW<[WriteShiftLd], (instregex "BT(R|S|C)(16|32|64)mi8")>; 852 853// BSF BSR. 854// r,r. 855def : InstRW<[WriteP1_Lat3], (instregex "BS(R|F)(16|32|64)rr")>; 856// r,m. 857def : InstRW<[WriteP1_Lat3Ld], (instregex "BS(R|F)(16|32|64)rm")>; 858 859// SETcc. 860// r. 861def : InstRW<[WriteShift], 862 (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)r")>; 863// m. 864def WriteSetCCm : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { 865 let NumMicroOps = 3; 866} 867def : InstRW<[WriteSetCCm], 868 (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>; 869 870// CLD STD. 871def WriteCldStd : SchedWriteRes<[HWPort15, HWPort6]> { 872 let NumMicroOps = 3; 873} 874def : InstRW<[WriteCldStd], (instregex "STD", "CLD")>; 875 876// LZCNT TZCNT. 877// r,r. 878def : InstRW<[WriteP1_Lat3], (instregex "(L|TZCNT)(16|32|64)rr")>; 879// r,m. 880def : InstRW<[WriteP1_Lat3Ld], (instregex "(L|TZCNT)(16|32|64)rm")>; 881 882// ANDN. 883// r,r. 884def : InstRW<[WriteP15], (instregex "ANDN(32|64)rr")>; 885// r,m. 886def : InstRW<[WriteP15Ld], (instregex "ANDN(32|64)rm")>; 887 888// BLSI BLSMSK BLSR. 889// r,r. 890def : InstRW<[WriteP15], (instregex "BLS(I|MSK|R)(32|64)rr")>; 891// r,m. 892def : InstRW<[WriteP15Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>; 893 894// BEXTR. 895// r,r,r. 896def : InstRW<[Write2P0156_Lat2], (instregex "BEXTR(32|64)rr")>; 897// r,m,r. 898def : InstRW<[Write2P0156_Lat2Ld], (instregex "BEXTR(32|64)rm")>; 899 900// BZHI. 901// r,r,r. 902def : InstRW<[WriteP15], (instregex "BZHI(32|64)rr")>; 903// r,m,r. 904def : InstRW<[WriteP15Ld], (instregex "BZHI(32|64)rm")>; 905 906// PDEP PEXT. 907// r,r,r. 908def : InstRW<[WriteP1_Lat3], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>; 909// r,m,r. 910def : InstRW<[WriteP1_Lat3Ld], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>; 911 912//-- Control transfer instructions --// 913 914// J(E|R)CXZ. 915def WriteJCXZ : SchedWriteRes<[HWPort0156, HWPort6]> { 916 let NumMicroOps = 2; 917} 918def : InstRW<[WriteJCXZ], (instregex "JCXZ", "JECXZ_(32|64)", "JRCXZ")>; 919 920// LOOP. 921def WriteLOOP : SchedWriteRes<[]> { 922 let NumMicroOps = 7; 923} 924def : InstRW<[WriteLOOP], (instregex "LOOP")>; 925 926// LOOP(N)E 927def WriteLOOPE : SchedWriteRes<[]> { 928 let NumMicroOps = 11; 929} 930def : InstRW<[WriteLOOPE], (instregex "LOOPE", "LOOPNE")>; 931 932// CALL. 933// r. 934def WriteCALLr : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> { 935 let NumMicroOps = 3; 936} 937def : InstRW<[WriteCALLr], (instregex "CALL(16|32)r")>; 938 939// m. 940def WriteCALLm : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> { 941 let NumMicroOps = 4; 942 let ResourceCycles = [2, 1, 1]; 943} 944def : InstRW<[WriteCALLm], (instregex "CALL(16|32)m")>; 945 946// RET. 947def WriteRET : SchedWriteRes<[HWPort237, HWPort6]> { 948 let NumMicroOps = 2; 949} 950def : InstRW<[WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)")>; 951 952// i. 953def WriteRETI : SchedWriteRes<[HWPort23, HWPort6, HWPort015]> { 954 let NumMicroOps = 4; 955 let ResourceCycles = [1, 2, 1]; 956} 957def : InstRW<[WriteRETI], (instregex "RETI(L|Q|W)", "LRETI(L|Q|W)")>; 958 959// BOUND. 960// r,m. 961def WriteBOUND : SchedWriteRes<[]> { 962 let NumMicroOps = 15; 963} 964def : InstRW<[WriteBOUND], (instregex "BOUNDS(16|32)rm")>; 965 966// INTO. 967def WriteINTO : SchedWriteRes<[]> { 968 let NumMicroOps = 4; 969} 970def : InstRW<[WriteINTO], (instregex "INTO")>; 971 972//-- String instructions --// 973 974// LODSB/W. 975def : InstRW<[Write2P0156_P23], (instregex "LODS(B|W)")>; 976 977// LODSD/Q. 978def : InstRW<[WriteP0156_P23], (instregex "LODS(L|Q)")>; 979 980// STOS. 981def WriteSTOS : SchedWriteRes<[HWPort23, HWPort0156, HWPort4]> { 982 let NumMicroOps = 3; 983} 984def : InstRW<[WriteSTOS], (instregex "STOS(B|L|Q|W)")>; 985 986// MOVS. 987def WriteMOVS : SchedWriteRes<[HWPort23, HWPort4, HWPort0156]> { 988 let Latency = 4; 989 let NumMicroOps = 5; 990 let ResourceCycles = [2, 1, 2]; 991} 992def : InstRW<[WriteMOVS], (instregex "MOVS(B|L|Q|W)")>; 993 994// SCAS. 995def : InstRW<[Write2P0156_P23], (instregex "SCAS(B|W|L|Q)")>; 996 997// CMPS. 998def WriteCMPS : SchedWriteRes<[HWPort23, HWPort0156]> { 999 let Latency = 4; 1000 let NumMicroOps = 5; 1001 let ResourceCycles = [2, 3]; 1002} 1003def : InstRW<[WriteCMPS], (instregex "CMPS(B|L|Q|W)")>; 1004 1005//-- Synchronization instructions --// 1006 1007// XADD. 1008def WriteXADD : SchedWriteRes<[]> { 1009 let NumMicroOps = 5; 1010} 1011def : InstRW<[WriteXADD], (instregex "XADD(8|16|32|64)rm")>; 1012 1013// CMPXCHG. 1014def WriteCMPXCHG : SchedWriteRes<[]> { 1015 let NumMicroOps = 6; 1016} 1017def : InstRW<[WriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>; 1018 1019// CMPXCHG8B. 1020def WriteCMPXCHG8B : SchedWriteRes<[]> { 1021 let NumMicroOps = 15; 1022} 1023def : InstRW<[WriteCMPXCHG8B], (instregex "CMPXCHG8B")>; 1024 1025// CMPXCHG16B. 1026def WriteCMPXCHG16B : SchedWriteRes<[]> { 1027 let NumMicroOps = 22; 1028} 1029def : InstRW<[WriteCMPXCHG16B], (instregex "CMPXCHG16B")>; 1030 1031//-- Other --// 1032 1033// PAUSE. 1034def WritePAUSE : SchedWriteRes<[HWPort05, HWPort6]> { 1035 let NumMicroOps = 5; 1036 let ResourceCycles = [1, 3]; 1037} 1038def : InstRW<[WritePAUSE], (instregex "PAUSE")>; 1039 1040// LEAVE. 1041def : InstRW<[Write2P0156_P23], (instregex "LEAVE")>; 1042 1043// XGETBV. 1044def WriteXGETBV : SchedWriteRes<[]> { 1045 let NumMicroOps = 8; 1046} 1047def : InstRW<[WriteXGETBV], (instregex "XGETBV")>; 1048 1049// RDTSC. 1050def WriteRDTSC : SchedWriteRes<[]> { 1051 let NumMicroOps = 15; 1052} 1053def : InstRW<[WriteRDTSC], (instregex "RDTSC")>; 1054 1055// RDPMC. 1056def WriteRDPMC : SchedWriteRes<[]> { 1057 let NumMicroOps = 34; 1058} 1059def : InstRW<[WriteRDPMC], (instregex "RDPMC")>; 1060 1061// RDRAND. 1062def WriteRDRAND : SchedWriteRes<[HWPort23, HWPort015]> { 1063 let NumMicroOps = 17; 1064 let ResourceCycles = [1, 16]; 1065} 1066def : InstRW<[WriteRDRAND], (instregex "RDRAND(16|32|64)r")>; 1067 1068//=== Floating Point x87 Instructions ===// 1069//-- Move instructions --// 1070 1071// FLD. 1072// m80. 1073def : InstRW<[WriteP01], (instregex "LD_Frr")>; 1074 1075def WriteLD_F80m : SchedWriteRes<[HWPort01, HWPort23]> { 1076 let Latency = 4; 1077 let NumMicroOps = 4; 1078 let ResourceCycles = [2, 2]; 1079} 1080def : InstRW<[WriteLD_F80m], (instregex "LD_F80m")>; 1081 1082// FBLD. 1083// m80. 1084def WriteFBLD : SchedWriteRes<[]> { 1085 let Latency = 47; 1086 let NumMicroOps = 43; 1087} 1088def : InstRW<[WriteFBLD], (instregex "FBLDm")>; 1089 1090// FST(P). 1091// r. 1092def : InstRW<[WriteP01], (instregex "ST_(F|FP)rr")>; 1093 1094// m80. 1095def WriteST_FP80m : SchedWriteRes<[HWPort0156, HWPort23, HWPort4]> { 1096 let NumMicroOps = 7; 1097 let ResourceCycles = [3, 2, 2]; 1098} 1099def : InstRW<[WriteST_FP80m], (instregex "ST_FP80m")>; 1100 1101// FBSTP. 1102// m80. 1103def WriteFBSTP : SchedWriteRes<[]> { 1104 let NumMicroOps = 226; 1105} 1106def : InstRW<[WriteFBSTP], (instregex "FBSTPm")>; 1107 1108// FXCHG. 1109def : InstRW<[WriteNop], (instregex "XCH_F")>; 1110 1111// FILD. 1112def WriteFILD : SchedWriteRes<[HWPort01, HWPort23]> { 1113 let Latency = 6; 1114 let NumMicroOps = 2; 1115} 1116def : InstRW<[WriteFILD], (instregex "ILD_F(16|32|64)m")>; 1117 1118// FIST(P) FISTTP. 1119def WriteFIST : SchedWriteRes<[HWPort1, HWPort23, HWPort4]> { 1120 let Latency = 7; 1121 let NumMicroOps = 3; 1122} 1123def : InstRW<[WriteFIST], (instregex "IST_(F|FP)(16|32)m")>; 1124 1125// FLDZ. 1126def : InstRW<[WriteP01], (instregex "LD_F0")>; 1127 1128// FLD1. 1129def : InstRW<[Write2P01], (instregex "LD_F1")>; 1130 1131// FLDPI FLDL2E etc. 1132def : InstRW<[Write2P01], (instregex "FLDPI", "FLDL2(T|E)" "FLDL(G|N)2")>; 1133 1134// FCMOVcc. 1135def WriteFCMOVcc : SchedWriteRes<[HWPort0, HWPort5]> { 1136 let Latency = 2; 1137 let NumMicroOps = 3; 1138 let ResourceCycles = [2, 1]; 1139} 1140def : InstRW<[WriteFCMOVcc], (instregex "CMOV(B|BE|P|NB|NBE|NE|NP)_F")>; 1141 1142// FNSTSW. 1143// AX. 1144def WriteFNSTSW : SchedWriteRes<[HWPort0, HWPort0156]> { 1145 let NumMicroOps = 2; 1146} 1147def : InstRW<[WriteFNSTSW], (instregex "FNSTSW16r")>; 1148 1149// m16. 1150def WriteFNSTSWm : SchedWriteRes<[HWPort0, HWPort4, HWPort237]> { 1151 let Latency = 6; 1152 let NumMicroOps = 3; 1153} 1154def : InstRW<[WriteFNSTSWm], (instregex "FNSTSWm")>; 1155 1156// FLDCW. 1157def WriteFLDCW : SchedWriteRes<[HWPort01, HWPort23, HWPort6]> { 1158 let Latency = 7; 1159 let NumMicroOps = 3; 1160} 1161def : InstRW<[WriteFLDCW], (instregex "FLDCW16m")>; 1162 1163// FNSTCW. 1164def WriteFNSTCW : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> { 1165 let NumMicroOps = 3; 1166} 1167def : InstRW<[WriteFNSTCW], (instregex "FNSTCW16m")>; 1168 1169// FINCSTP FDECSTP. 1170def : InstRW<[WriteP01], (instregex "FINCSTP", "FDECSTP")>; 1171 1172// FFREE. 1173def : InstRW<[WriteP01], (instregex "FFREE")>; 1174 1175// FNSAVE. 1176def WriteFNSAVE : SchedWriteRes<[]> { 1177 let NumMicroOps = 147; 1178} 1179def : InstRW<[WriteFNSAVE], (instregex "FSAVEm")>; 1180 1181// FRSTOR. 1182def WriteFRSTOR : SchedWriteRes<[]> { 1183 let NumMicroOps = 90; 1184} 1185def : InstRW<[WriteFRSTOR], (instregex "FRSTORm")>; 1186 1187//-- Arithmetic instructions --// 1188 1189// FABS. 1190def : InstRW<[WriteP0], (instregex "ABS_F")>; 1191 1192// FCHS. 1193def : InstRW<[WriteP0], (instregex "CHS_F")>; 1194 1195// FCOM(P) FUCOM(P). 1196// r. 1197def : InstRW<[WriteP1], (instregex "COM_FST0r", "COMP_FST0r", "UCOM_Fr", 1198 "UCOM_FPr")>; 1199// m. 1200def : InstRW<[WriteP1_P23], (instregex "FCOM(32|64)m", "FCOMP(32|64)m")>; 1201 1202// FCOMPP FUCOMPP. 1203// r. 1204def : InstRW<[Write2P01], (instregex "FCOMPP", "UCOM_FPPr")>; 1205 1206// FCOMI(P) FUCOMI(P). 1207// m. 1208def : InstRW<[Write3P01], (instregex "COM_FIr", "COM_FIPr", "UCOM_FIr", 1209 "UCOM_FIPr")>; 1210 1211// FICOM(P). 1212def : InstRW<[Write2P1_P23], (instregex "FICOM(16|32)m", "FICOMP(16|32)m")>; 1213 1214// FTST. 1215def : InstRW<[WriteP1], (instregex "TST_F")>; 1216 1217// FXAM. 1218def : InstRW<[Write2P1], (instregex "FXAM")>; 1219 1220// FPREM. 1221def WriteFPREM : SchedWriteRes<[]> { 1222 let Latency = 19; 1223 let NumMicroOps = 28; 1224} 1225def : InstRW<[WriteFPREM], (instregex "FPREM")>; 1226 1227// FPREM1. 1228def WriteFPREM1 : SchedWriteRes<[]> { 1229 let Latency = 27; 1230 let NumMicroOps = 41; 1231} 1232def : InstRW<[WriteFPREM1], (instregex "FPREM1")>; 1233 1234// FRNDINT. 1235def WriteFRNDINT : SchedWriteRes<[]> { 1236 let Latency = 11; 1237 let NumMicroOps = 17; 1238} 1239def : InstRW<[WriteFRNDINT], (instregex "FRNDINT")>; 1240 1241//-- Math instructions --// 1242 1243// FSCALE. 1244def WriteFSCALE : SchedWriteRes<[]> { 1245 let Latency = 75; // 49-125 1246 let NumMicroOps = 50; // 25-75 1247} 1248def : InstRW<[WriteFSCALE], (instregex "FSCALE")>; 1249 1250// FXTRACT. 1251def WriteFXTRACT : SchedWriteRes<[]> { 1252 let Latency = 15; 1253 let NumMicroOps = 17; 1254} 1255def : InstRW<[WriteFXTRACT], (instregex "FXTRACT")>; 1256 1257//-- Other instructions --// 1258 1259// FNOP. 1260def : InstRW<[WriteP01], (instregex "FNOP")>; 1261 1262// WAIT. 1263def : InstRW<[Write2P01], (instregex "WAIT")>; 1264 1265// FNCLEX. 1266def : InstRW<[Write5P0156], (instregex "FNCLEX")>; 1267 1268// FNINIT. 1269def WriteFNINIT : SchedWriteRes<[]> { 1270 let NumMicroOps = 26; 1271} 1272def : InstRW<[WriteFNINIT], (instregex "FNINIT")>; 1273 1274//=== Integer MMX and XMM Instructions ===// 1275//-- Move instructions --// 1276 1277// MOVD. 1278// r32/64 <- (x)mm. 1279def : InstRW<[WriteP0], (instregex "MMX_MOVD64grr", "MMX_MOVD64from64rr", 1280 "VMOVPDI2DIrr", "MOVPDI2DIrr")>; 1281 1282// (x)mm <- r32/64. 1283def : InstRW<[WriteP5], (instregex "MMX_MOVD64rr", "MMX_MOVD64to64rr", 1284 "VMOVDI2PDIrr", "MOVDI2PDIrr")>; 1285 1286// MOVQ. 1287// r64 <- (x)mm. 1288def : InstRW<[WriteP0], (instregex "VMOVPQIto64rr")>; 1289 1290// (x)mm <- r64. 1291def : InstRW<[WriteP5], (instregex "VMOV64toPQIrr", "VMOVZQI2PQIrr")>; 1292 1293// (x)mm <- (x)mm. 1294def : InstRW<[WriteP015], (instregex "MMX_MOVQ64rr")>; 1295 1296// (V)MOVDQA/U. 1297// x <- x. 1298def : InstRW<[WriteP015], (instregex "MOVDQ(A|U)rr", "VMOVDQ(A|U)rr", 1299 "MOVDQ(A|U)rr_REV", "VMOVDQ(A|U)rr_REV", 1300 "VMOVDQ(A|U)Yrr", "VMOVDQ(A|U)Yrr_REV")>; 1301 1302// MOVDQ2Q. 1303def : InstRW<[WriteP01_P5], (instregex "MMX_MOVDQ2Qrr")>; 1304 1305// MOVQ2DQ. 1306def : InstRW<[WriteP015], (instregex "MMX_MOVQ2DQrr")>; 1307 1308 1309// PACKSSWB/DW. 1310// mm <- mm. 1311def WriteMMXPACKSSrr : SchedWriteRes<[HWPort5]> { 1312 let Latency = 2; 1313 let NumMicroOps = 3; 1314 let ResourceCycles = [3]; 1315} 1316def : InstRW<[WriteMMXPACKSSrr], (instregex "MMX_PACKSSDWirr", 1317 "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>; 1318 1319// mm <- m64. 1320def WriteMMXPACKSSrm : SchedWriteRes<[HWPort23, HWPort5]> { 1321 let Latency = 4; 1322 let NumMicroOps = 3; 1323 let ResourceCycles = [1, 3]; 1324} 1325def : InstRW<[WriteMMXPACKSSrm], (instregex "MMX_PACKSSDWirm", 1326 "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>; 1327 1328// VPMOVSX/ZX BW BD BQ DW DQ. 1329// y <- x. 1330def WriteVPMOVSX : SchedWriteRes<[HWPort5]> { 1331 let Latency = 3; 1332 let NumMicroOps = 1; 1333} 1334def : InstRW<[WriteVPMOVSX], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>; 1335 1336// PBLENDW. 1337// x,x,i / v,v,v,i 1338def WritePBLENDWr : SchedWriteRes<[HWPort5]>; 1339def : InstRW<[WritePBLENDWr], (instregex "(V?)PBLENDW(Y?)rri")>; 1340 1341// x,m,i / v,v,m,i 1342def WritePBLENDWm : SchedWriteRes<[HWPort5, HWPort23]> { 1343 let NumMicroOps = 2; 1344 let Latency = 4; 1345 let ResourceCycles = [1, 1]; 1346} 1347def : InstRW<[WritePBLENDWm, ReadAfterLd], (instregex "(V?)PBLENDW(Y?)rmi")>; 1348 1349// VPBLENDD. 1350// v,v,v,i. 1351def WriteVPBLENDDr : SchedWriteRes<[HWPort015]>; 1352def : InstRW<[WriteVPBLENDDr], (instregex "VPBLENDD(Y?)rri")>; 1353 1354// v,v,m,i 1355def WriteVPBLENDDm : SchedWriteRes<[HWPort015, HWPort23]> { 1356 let NumMicroOps = 2; 1357 let Latency = 4; 1358 let ResourceCycles = [1, 1]; 1359} 1360def : InstRW<[WriteVPBLENDDm, ReadAfterLd], (instregex "VPBLENDD(Y?)rmi")>; 1361 1362// MASKMOVQ. 1363def WriteMASKMOVQ : SchedWriteRes<[HWPort0, HWPort4, HWPort23]> { 1364 let Latency = 13; 1365 let NumMicroOps = 4; 1366 let ResourceCycles = [1, 1, 2]; 1367} 1368def : InstRW<[WriteMASKMOVQ], (instregex "MMX_MASKMOVQ(64)?")>; 1369 1370// MASKMOVDQU. 1371def WriteMASKMOVDQU : SchedWriteRes<[HWPort04, HWPort56, HWPort23]> { 1372 let Latency = 14; 1373 let NumMicroOps = 10; 1374 let ResourceCycles = [4, 2, 4]; 1375} 1376def : InstRW<[WriteMASKMOVDQU], (instregex "(V?)MASKMOVDQU(64)?")>; 1377 1378// VPMASKMOV D/Q. 1379// v,v,m. 1380def WriteVPMASKMOVr : SchedWriteRes<[HWPort5, HWPort23]> { 1381 let Latency = 4; 1382 let NumMicroOps = 3; 1383 let ResourceCycles = [2, 1]; 1384} 1385def : InstRW<[WriteVPMASKMOVr, ReadAfterLd], 1386 (instregex "VPMASKMOV(D|Q)(Y?)rm")>; 1387 1388// m, v,v. 1389def WriteVPMASKMOVm : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { 1390 let Latency = 13; 1391 let NumMicroOps = 4; 1392 let ResourceCycles = [1, 1, 1, 1]; 1393} 1394def : InstRW<[WriteVPMASKMOVm], (instregex "VPMASKMOV(D|Q)(Y?)mr")>; 1395 1396// PMOVMSKB. 1397def WritePMOVMSKB : SchedWriteRes<[HWPort0]> { 1398 let Latency = 3; 1399} 1400def : InstRW<[WritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKB(Y?)rr")>; 1401 1402// PEXTR B/W/D/Q. 1403// r32,x,i. 1404def WritePEXTRr : SchedWriteRes<[HWPort0, HWPort5]> { 1405 let Latency = 2; 1406 let NumMicroOps = 2; 1407 let ResourceCycles = [1, 1]; 1408} 1409def : InstRW<[WritePEXTRr], (instregex "PEXTR(B|W|D|Q)rr", "MMX_PEXTRWirri")>; 1410 1411// m8,x,i. 1412def WritePEXTRm : SchedWriteRes<[HWPort23, HWPort4, HWPort5]> { 1413 let NumMicroOps = 3; 1414 let ResourceCycles = [1, 1, 1]; 1415} 1416def : InstRW<[WritePEXTRm], (instregex "PEXTR(B|W|D|Q)mr")>; 1417 1418// VPBROADCAST B/W. 1419// x, m8/16. 1420def WriteVPBROADCAST128Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> { 1421 let Latency = 5; 1422 let NumMicroOps = 3; 1423 let ResourceCycles = [1, 1, 1]; 1424} 1425def : InstRW<[WriteVPBROADCAST128Ld, ReadAfterLd], 1426 (instregex "VPBROADCAST(B|W)rm")>; 1427 1428// y, m8/16 1429def WriteVPBROADCAST256Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> { 1430 let Latency = 7; 1431 let NumMicroOps = 3; 1432 let ResourceCycles = [1, 1, 1]; 1433} 1434def : InstRW<[WriteVPBROADCAST256Ld, ReadAfterLd], 1435 (instregex "VPBROADCAST(B|W)Yrm")>; 1436 1437// VPGATHERDD. 1438// x. 1439def WriteVPGATHERDD128 : SchedWriteRes<[]> { 1440 let NumMicroOps = 20; 1441} 1442def : InstRW<[WriteVPGATHERDD128, ReadAfterLd], (instregex "VPGATHERDDrm")>; 1443 1444// y. 1445def WriteVPGATHERDD256 : SchedWriteRes<[]> { 1446 let NumMicroOps = 34; 1447} 1448def : InstRW<[WriteVPGATHERDD256, ReadAfterLd], (instregex "VPGATHERDDYrm")>; 1449 1450// VPGATHERQD. 1451// x. 1452def WriteVPGATHERQD128 : SchedWriteRes<[]> { 1453 let NumMicroOps = 15; 1454} 1455def : InstRW<[WriteVPGATHERQD128, ReadAfterLd], (instregex "VPGATHERQDrm")>; 1456 1457// y. 1458def WriteVPGATHERQD256 : SchedWriteRes<[]> { 1459 let NumMicroOps = 22; 1460} 1461def : InstRW<[WriteVPGATHERQD256, ReadAfterLd], (instregex "VPGATHERQDYrm")>; 1462 1463// VPGATHERDQ. 1464// x. 1465def WriteVPGATHERDQ128 : SchedWriteRes<[]> { 1466 let NumMicroOps = 12; 1467} 1468def : InstRW<[WriteVPGATHERDQ128, ReadAfterLd], (instregex "VPGATHERDQrm")>; 1469 1470// y. 1471def WriteVPGATHERDQ256 : SchedWriteRes<[]> { 1472 let NumMicroOps = 20; 1473} 1474def : InstRW<[WriteVPGATHERDQ256, ReadAfterLd], (instregex "VPGATHERDQYrm")>; 1475 1476// VPGATHERQQ. 1477// x. 1478def WriteVPGATHERQQ128 : SchedWriteRes<[]> { 1479 let NumMicroOps = 14; 1480} 1481def : InstRW<[WriteVPGATHERQQ128, ReadAfterLd], (instregex "VPGATHERQQrm")>; 1482 1483// y. 1484def WriteVPGATHERQQ256 : SchedWriteRes<[]> { 1485 let NumMicroOps = 22; 1486} 1487def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>; 1488 1489//-- Arithmetic instructions --// 1490 1491// PHADD|PHSUB (S) W/D. 1492// v <- v,v. 1493def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> { 1494 let Latency = 3; 1495 let NumMicroOps = 3; 1496 let ResourceCycles = [1, 2]; 1497} 1498def : InstRW<[WritePHADDSUBr], (instregex "MMX_PHADD(W?)rr64", 1499 "MMX_PHADDSWrr64", 1500 "MMX_PHSUB(W|D)rr64", 1501 "MMX_PHSUBSWrr64", 1502 "(V?)PH(ADD|SUB)(W|D)(Y?)rr", 1503 "(V?)PH(ADD|SUB)SWrr(256)?")>; 1504 1505// v <- v,m. 1506def WritePHADDSUBm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { 1507 let Latency = 6; 1508 let NumMicroOps = 3; 1509 let ResourceCycles = [1, 2, 1]; 1510} 1511def : InstRW<[WritePHADDSUBm, ReadAfterLd], 1512 (instregex "MMX_PHADD(W?)rm64", 1513 "MMX_PHADDSWrm64", 1514 "MMX_PHSUB(W|D)rm64", 1515 "MMX_PHSUBSWrm64", 1516 "(V?)PH(ADD|SUB)(W|D)(Y?)rm", 1517 "(V?)PH(ADD|SUB)SWrm(128|256)?")>; 1518 1519// PCMPGTQ. 1520// v <- v,v. 1521def WritePCMPGTQr : SchedWriteRes<[HWPort0]> { 1522 let Latency = 5; 1523 let NumMicroOps = 1; 1524} 1525def : InstRW<[WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>; 1526 1527// v <- v,m. 1528def WritePCMPGTQm : SchedWriteRes<[HWPort0, HWPort23]> { 1529 let Latency = 5; 1530 let NumMicroOps = 2; 1531 let ResourceCycles = [1, 1]; 1532} 1533def : InstRW<[WritePCMPGTQm, ReadAfterLd], (instregex "(V?)PCMPGTQ(Y?)rm")>; 1534 1535// PMULLD. 1536// x,x / y,y,y. 1537def WritePMULLDr : SchedWriteRes<[HWPort0]> { 1538 let Latency = 10; 1539 let NumMicroOps = 2; 1540 let ResourceCycles = [2]; 1541} 1542def : InstRW<[WritePMULLDr], (instregex "(V?)PMULLD(Y?)rr")>; 1543 1544// x,m / y,y,m. 1545def WritePMULLDm : SchedWriteRes<[HWPort0, HWPort23]> { 1546 let Latency = 10; 1547 let NumMicroOps = 3; 1548 let ResourceCycles = [2, 1]; 1549} 1550def : InstRW<[WritePMULLDm, ReadAfterLd], (instregex "(V?)PMULLD(Y?)rm")>; 1551 1552//-- Logic instructions --// 1553 1554// PTEST. 1555// v,v. 1556def WritePTESTr : SchedWriteRes<[HWPort0, HWPort5]> { 1557 let Latency = 2; 1558 let NumMicroOps = 2; 1559 let ResourceCycles = [1, 1]; 1560} 1561def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rr")>; 1562 1563// v,m. 1564def WritePTESTm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> { 1565 let Latency = 6; 1566 let NumMicroOps = 3; 1567 let ResourceCycles = [1, 1, 1]; 1568} 1569def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rm")>; 1570 1571// PSLL,PSRL,PSRA W/D/Q. 1572// x,x / v,v,x. 1573def WritePShift : SchedWriteRes<[HWPort0, HWPort5]> { 1574 let Latency = 2; 1575 let NumMicroOps = 2; 1576 let ResourceCycles = [1, 1]; 1577} 1578def : InstRW<[WritePShift], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)(Y?)rr")>; 1579 1580// PSLL,PSRL DQ. 1581def : InstRW<[WriteP5], (instregex "(V?)PS(R|L)LDQ(Y?)ri")>; 1582 1583//-- Other --// 1584 1585// EMMS. 1586def WriteEMMS : SchedWriteRes<[]> { 1587 let Latency = 13; 1588 let NumMicroOps = 31; 1589} 1590def : InstRW<[WriteEMMS], (instregex "MMX_EMMS")>; 1591 1592//=== Floating Point XMM and YMM Instructions ===// 1593//-- Move instructions --// 1594 1595// MOVMSKP S/D. 1596// r32 <- x. 1597def WriteMOVMSKPr : SchedWriteRes<[HWPort0]> { 1598 let Latency = 3; 1599} 1600def : InstRW<[WriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)rr")>; 1601 1602// r32 <- y. 1603def WriteVMOVMSKPYr : SchedWriteRes<[HWPort0]> { 1604 let Latency = 2; 1605} 1606def : InstRW<[WriteVMOVMSKPYr], (instregex "VMOVMSKP(S|D)Yrr")>; 1607 1608// VPERM2F128. 1609def : InstRW<[WriteFShuffle256], (instregex "VPERM2F128rr")>; 1610def : InstRW<[WriteFShuffle256Ld, ReadAfterLd], (instregex "VPERM2F128rm")>; 1611 1612// BLENDVP S/D. 1613def : InstRW<[WriteFVarBlend], (instregex "BLENDVP(S|D)rr0")>; 1614def : InstRW<[WriteFVarBlendLd, ReadAfterLd], (instregex "BLENDVP(S|D)rm0")>; 1615 1616// VBROADCASTF128. 1617def : InstRW<[WriteLoad], (instregex "VBROADCASTF128")>; 1618 1619// EXTRACTPS. 1620// r32,x,i. 1621def WriteEXTRACTPSr : SchedWriteRes<[HWPort0, HWPort5]> { 1622 let NumMicroOps = 2; 1623 let ResourceCycles = [1, 1]; 1624} 1625def : InstRW<[WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>; 1626 1627// m32,x,i. 1628def WriteEXTRACTPSm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> { 1629 let Latency = 4; 1630 let NumMicroOps = 3; 1631 let ResourceCycles = [1, 1, 1]; 1632} 1633def : InstRW<[WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>; 1634 1635// VEXTRACTF128. 1636// x,y,i. 1637def : InstRW<[WriteFShuffle256], (instregex "VEXTRACTF128rr")>; 1638 1639// m128,y,i. 1640def WriteVEXTRACTF128m : SchedWriteRes<[HWPort23, HWPort4]> { 1641 let Latency = 4; 1642 let NumMicroOps = 2; 1643 let ResourceCycles = [1, 1]; 1644} 1645def : InstRW<[WriteVEXTRACTF128m], (instregex "VEXTRACTF128mr")>; 1646 1647// VINSERTF128. 1648// y,y,x,i. 1649def : InstRW<[WriteFShuffle256], (instregex "VINSERTF128rr")>; 1650 1651// y,y,m128,i. 1652def WriteVINSERTF128m : SchedWriteRes<[HWPort015, HWPort23]> { 1653 let Latency = 4; 1654 let NumMicroOps = 2; 1655 let ResourceCycles = [1, 1]; 1656} 1657def : InstRW<[WriteFShuffle256, ReadAfterLd], (instregex "VINSERTF128rm")>; 1658 1659// VMASKMOVP S/D. 1660// v,v,m. 1661def WriteVMASKMOVPrm : SchedWriteRes<[HWPort5, HWPort23]> { 1662 let Latency = 4; 1663 let NumMicroOps = 3; 1664 let ResourceCycles = [2, 1]; 1665} 1666def : InstRW<[WriteVMASKMOVPrm], (instregex "VMASKMOVP(S|D)(Y?)rm")>; 1667 1668// m128,x,x. 1669def WriteVMASKMOVPmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { 1670 let Latency = 13; 1671 let NumMicroOps = 4; 1672 let ResourceCycles = [1, 1, 1, 1]; 1673} 1674def : InstRW<[WriteVMASKMOVPmr], (instregex "VMASKMOVP(S|D)mr")>; 1675 1676// m256,y,y. 1677def WriteVMASKMOVPYmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { 1678 let Latency = 14; 1679 let NumMicroOps = 4; 1680 let ResourceCycles = [1, 1, 1, 1]; 1681} 1682def : InstRW<[WriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>; 1683 1684// VGATHERDPS. 1685// x. 1686def WriteVGATHERDPS128 : SchedWriteRes<[]> { 1687 let NumMicroOps = 20; 1688} 1689def : InstRW<[WriteVGATHERDPS128, ReadAfterLd], (instregex "VGATHERDPSrm")>; 1690 1691// y. 1692def WriteVGATHERDPS256 : SchedWriteRes<[]> { 1693 let NumMicroOps = 34; 1694} 1695def : InstRW<[WriteVGATHERDPS256, ReadAfterLd], (instregex "VGATHERDPSYrm")>; 1696 1697// VGATHERQPS. 1698// x. 1699def WriteVGATHERQPS128 : SchedWriteRes<[]> { 1700 let NumMicroOps = 15; 1701} 1702def : InstRW<[WriteVGATHERQPS128, ReadAfterLd], (instregex "VGATHERQPSrm")>; 1703 1704// y. 1705def WriteVGATHERQPS256 : SchedWriteRes<[]> { 1706 let NumMicroOps = 22; 1707} 1708def : InstRW<[WriteVGATHERQPS256, ReadAfterLd], (instregex "VGATHERQPSYrm")>; 1709 1710// VGATHERDPD. 1711// x. 1712def WriteVGATHERDPD128 : SchedWriteRes<[]> { 1713 let NumMicroOps = 12; 1714} 1715def : InstRW<[WriteVGATHERDPD128, ReadAfterLd], (instregex "VGATHERDPDrm")>; 1716 1717// y. 1718def WriteVGATHERDPD256 : SchedWriteRes<[]> { 1719 let NumMicroOps = 20; 1720} 1721def : InstRW<[WriteVGATHERDPD256, ReadAfterLd], (instregex "VGATHERDPDYrm")>; 1722 1723// VGATHERQPD. 1724// x. 1725def WriteVGATHERQPD128 : SchedWriteRes<[]> { 1726 let NumMicroOps = 14; 1727} 1728def : InstRW<[WriteVGATHERQPD128, ReadAfterLd], (instregex "VGATHERQPDrm")>; 1729 1730// y. 1731def WriteVGATHERQPD256 : SchedWriteRes<[]> { 1732 let NumMicroOps = 22; 1733} 1734def : InstRW<[WriteVGATHERQPD256, ReadAfterLd], (instregex "VGATHERQPDYrm")>; 1735 1736//-- Conversion instructions --// 1737 1738// CVTPD2PS. 1739// x,x. 1740def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVTPD2PSrr")>; 1741 1742// x,m128. 1743def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVTPD2PS(X?)rm")>; 1744 1745// x,y. 1746def WriteCVTPD2PSYrr : SchedWriteRes<[HWPort1, HWPort5]> { 1747 let Latency = 5; 1748 let NumMicroOps = 2; 1749 let ResourceCycles = [1, 1]; 1750} 1751def : InstRW<[WriteCVTPD2PSYrr], (instregex "(V?)CVTPD2PSYrr")>; 1752 1753// x,m256. 1754def WriteCVTPD2PSYrm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { 1755 let Latency = 9; 1756 let NumMicroOps = 3; 1757 let ResourceCycles = [1, 1, 1]; 1758} 1759def : InstRW<[WriteCVTPD2PSYrm], (instregex "(V?)CVTPD2PSYrm")>; 1760 1761// CVTSD2SS. 1762// x,x. 1763def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V)?CVTSD2SSrr")>; 1764 1765// x,m64. 1766def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(Int_)?(V)?CVTSD2SSrm")>; 1767 1768// CVTPS2PD. 1769// x,x. 1770def WriteCVTPS2PDrr : SchedWriteRes<[HWPort0, HWPort5]> { 1771 let Latency = 2; 1772 let NumMicroOps = 2; 1773 let ResourceCycles = [1, 1]; 1774} 1775def : InstRW<[WriteCVTPS2PDrr], (instregex "(V?)CVTPS2PDrr")>; 1776 1777// x,m64. 1778// y,m128. 1779def WriteCVTPS2PDrm : SchedWriteRes<[HWPort0, HWPort23]> { 1780 let Latency = 5; 1781 let NumMicroOps = 2; 1782 let ResourceCycles = [1, 1]; 1783} 1784def : InstRW<[WriteCVTPS2PDrm], (instregex "(V?)CVTPS2PD(Y?)rm")>; 1785 1786// y,x. 1787def WriteVCVTPS2PDYrr : SchedWriteRes<[HWPort0, HWPort5]> { 1788 let Latency = 5; 1789 let NumMicroOps = 2; 1790 let ResourceCycles = [1, 1]; 1791} 1792def : InstRW<[WriteVCVTPS2PDYrr], (instregex "VCVTPS2PDYrr")>; 1793 1794// CVTSS2SD. 1795// x,x. 1796def WriteCVTSS2SDrr : SchedWriteRes<[HWPort0, HWPort5]> { 1797 let Latency = 2; 1798 let NumMicroOps = 2; 1799 let ResourceCycles = [1, 1]; 1800} 1801def : InstRW<[WriteCVTSS2SDrr], (instregex "(Int_)?(V?)CVTSS2SDrr")>; 1802 1803// x,m32. 1804def WriteCVTSS2SDrm : SchedWriteRes<[HWPort0, HWPort23]> { 1805 let Latency = 5; 1806 let NumMicroOps = 2; 1807 let ResourceCycles = [1, 1]; 1808} 1809def : InstRW<[WriteCVTSS2SDrm], (instregex "(Int_)?(V?)CVTSS2SDrm")>; 1810 1811// CVTDQ2PD. 1812// x,x. 1813def : InstRW<[WriteP1_P5_Lat4], (instregex "(V)?CVTDQ2PDrr")>; 1814 1815// y,x. 1816def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVTDQ2PDYrr")>; 1817 1818// CVT(T)PD2DQ. 1819// x,x. 1820def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVT(T?)PD2DQrr")>; 1821// x,m128. 1822def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVT(T?)PD2DQrm")>; 1823// x,y. 1824def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVT(T?)PD2DQYrr")>; 1825// x,m256. 1826def : InstRW<[WriteP1_P5_Lat6Ld], (instregex "VCVT(T?)PD2DQYrm")>; 1827 1828// CVT(T)PS2PI. 1829// mm,x. 1830def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PS2PIirr")>; 1831 1832// CVTPI2PD. 1833// x,mm. 1834def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PI2PDirr")>; 1835 1836// CVT(T)PD2PI. 1837// mm,x. 1838def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PD2PIirr")>; 1839 1840// CVSTSI2SS. 1841// x,r32. 1842def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V?)CVT(T?)SI2SS(64)?rr")>; 1843 1844// CVT(T)SS2SI. 1845// r32,x. 1846def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rr")>; 1847// r32,m32. 1848def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rm")>; 1849 1850// CVTSI2SD. 1851// x,r32/64. 1852def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVTSI2SS(64)?rr")>; 1853 1854// CVTSD2SI. 1855// r32/64 1856def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rr")>; 1857// r32,m32. 1858def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rm")>; 1859 1860// VCVTPS2PH. 1861// x,v,i. 1862def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPS2PH(Y?)rr")>; 1863// m,v,i. 1864def : InstRW<[WriteP1_P5_Lat4Ld, WriteRMW], (instregex "VCVTPS2PH(Y?)mr")>; 1865 1866// VCVTPH2PS. 1867// v,x. 1868def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPH2PS(Y?)rr")>; 1869 1870//-- Arithmetic instructions --// 1871 1872// HADD, HSUB PS/PD 1873// x,x / v,v,v. 1874def WriteHADDSUBPr : SchedWriteRes<[HWPort1, HWPort5]> { 1875 let Latency = 5; 1876 let NumMicroOps = 3; 1877 let ResourceCycles = [1, 2]; 1878} 1879def : InstRW<[WriteHADDSUBPr], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rr")>; 1880 1881// x,m / v,v,m. 1882def WriteHADDSUBPm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { 1883 let Latency = 9; 1884 let NumMicroOps = 4; 1885 let ResourceCycles = [1, 2, 1]; 1886} 1887def : InstRW<[WriteHADDSUBPm], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rm")>; 1888 1889// MULL SS/SD PS/PD. 1890// x,x / v,v,v. 1891def WriteMULr : SchedWriteRes<[HWPort01]> { 1892 let Latency = 5; 1893} 1894def : InstRW<[WriteMULr], (instregex "(V?)MUL(P|S)(S|D)rr")>; 1895 1896// x,m / v,v,m. 1897def WriteMULm : SchedWriteRes<[HWPort01, HWPort23]> { 1898 let Latency = 9; 1899 let NumMicroOps = 2; 1900 let ResourceCycles = [1, 1]; 1901} 1902def : InstRW<[WriteMULm], (instregex "(V?)MUL(P|S)(S|D)rm")>; 1903 1904// VDIVPS. 1905// y,y,y. 1906def WriteVDIVPSYrr : SchedWriteRes<[HWPort0, HWPort15]> { 1907 let Latency = 19; // 18-21 cycles. 1908 let NumMicroOps = 3; 1909 let ResourceCycles = [2, 1]; 1910} 1911def : InstRW<[WriteVDIVPSYrr], (instregex "VDIVPSYrr")>; 1912 1913// y,y,m256. 1914def WriteVDIVPSYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { 1915 let Latency = 23; // 18-21 + 4 cycles. 1916 let NumMicroOps = 4; 1917 let ResourceCycles = [2, 1, 1]; 1918} 1919def : InstRW<[WriteVDIVPSYrm, ReadAfterLd], (instregex "VDIVPSYrm")>; 1920 1921// VDIVPD. 1922// y,y,y. 1923def WriteVDIVPDYrr : SchedWriteRes<[HWPort0, HWPort15]> { 1924 let Latency = 27; // 19-35 cycles. 1925 let NumMicroOps = 3; 1926 let ResourceCycles = [2, 1]; 1927} 1928def : InstRW<[WriteVDIVPDYrr], (instregex "VDIVPDYrr")>; 1929 1930// y,y,m256. 1931def WriteVDIVPDYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { 1932 let Latency = 31; // 19-35 + 4 cycles. 1933 let NumMicroOps = 4; 1934 let ResourceCycles = [2, 1, 1]; 1935} 1936def : InstRW<[WriteVDIVPDYrm, ReadAfterLd], (instregex "VDIVPDYrm")>; 1937 1938// VRCPPS. 1939// y,y. 1940def WriteVRCPPSr : SchedWriteRes<[HWPort0, HWPort15]> { 1941 let Latency = 7; 1942 let NumMicroOps = 3; 1943 let ResourceCycles = [2, 1]; 1944} 1945def : InstRW<[WriteVRCPPSr], (instregex "VRCPPSYr(_Int)?")>; 1946 1947// y,m256. 1948def WriteVRCPPSm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { 1949 let Latency = 11; 1950 let NumMicroOps = 4; 1951 let ResourceCycles = [2, 1, 1]; 1952} 1953def : InstRW<[WriteVRCPPSm], (instregex "VRCPPSYm(_Int)?")>; 1954 1955// ROUND SS/SD PS/PD. 1956// v,v,i. 1957def WriteROUNDr : SchedWriteRes<[HWPort1]> { 1958 let Latency = 6; 1959 let NumMicroOps = 2; 1960 let ResourceCycles = [2]; 1961} 1962def : InstRW<[WriteROUNDr], (instregex "(V?)ROUND(Y?)(S|P)(S|D)r(_Int)?")>; 1963 1964// v,m,i. 1965def WriteROUNDm : SchedWriteRes<[HWPort1, HWPort23]> { 1966 let Latency = 10; 1967 let NumMicroOps = 3; 1968 let ResourceCycles = [2, 1]; 1969} 1970def : InstRW<[WriteROUNDm], (instregex "(V?)ROUND(Y?)(S|P)(S|D)m(_Int)?")>; 1971 1972// DPPS. 1973// x,x,i / v,v,v,i. 1974def WriteDPPSr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> { 1975 let Latency = 14; 1976 let NumMicroOps = 4; 1977 let ResourceCycles = [2, 1, 1]; 1978} 1979def : InstRW<[WriteDPPSr], (instregex "(V?)DPPS(Y?)rri")>; 1980 1981// x,m,i / v,v,m,i. 1982def WriteDPPSm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23, HWPort6]> { 1983 let Latency = 18; 1984 let NumMicroOps = 6; 1985 let ResourceCycles = [2, 1, 1, 1, 1]; 1986} 1987def : InstRW<[WriteDPPSm, ReadAfterLd], (instregex "(V?)DPPS(Y?)rmi")>; 1988 1989// DPPD. 1990// x,x,i. 1991def WriteDPPDr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> { 1992 let Latency = 9; 1993 let NumMicroOps = 3; 1994 let ResourceCycles = [1, 1, 1]; 1995} 1996def : InstRW<[WriteDPPDr], (instregex "(V?)DPPDrri")>; 1997 1998// x,m,i. 1999def WriteDPPDm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23]> { 2000 let Latency = 13; 2001 let NumMicroOps = 4; 2002 let ResourceCycles = [1, 1, 1, 1]; 2003} 2004def : InstRW<[WriteDPPDm], (instregex "(V?)DPPDrmi")>; 2005 2006// VFMADD. 2007// v,v,v. 2008def WriteFMADDr : SchedWriteRes<[HWPort01]> { 2009 let Latency = 5; 2010 let NumMicroOps = 1; 2011} 2012def : InstRW<[WriteFMADDr], 2013 (instregex 2014 // 3p forms. 2015 "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?", 2016 // 3s forms. 2017 "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)r", 2018 // 4s/4s_int forms. 2019 "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?", 2020 // 4p forms. 2021 "VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>; 2022 2023// v,v,m. 2024def WriteFMADDm : SchedWriteRes<[HWPort01, HWPort23]> { 2025 let Latency = 9; 2026 let NumMicroOps = 2; 2027 let ResourceCycles = [1, 1]; 2028} 2029def : InstRW<[WriteFMADDm], 2030 (instregex 2031 // 3p forms. 2032 "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?", 2033 // 3s forms. 2034 "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)m", 2035 // 4s/4s_int forms. 2036 "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?", 2037 // 4p forms. 2038 "VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>; 2039 2040//-- Math instructions --// 2041 2042// VSQRTPS. 2043// y,y. 2044def WriteVSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> { 2045 let Latency = 19; 2046 let NumMicroOps = 3; 2047 let ResourceCycles = [2, 1]; 2048} 2049def : InstRW<[WriteVSQRTPSYr], (instregex "VSQRTPSYr")>; 2050 2051// y,m256. 2052def WriteVSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { 2053 let Latency = 23; 2054 let NumMicroOps = 4; 2055 let ResourceCycles = [2, 1, 1]; 2056} 2057def : InstRW<[WriteVSQRTPSYm], (instregex "VSQRTPSYm")>; 2058 2059// VSQRTPD. 2060// y,y. 2061def WriteVSQRTPDYr : SchedWriteRes<[HWPort0, HWPort15]> { 2062 let Latency = 28; 2063 let NumMicroOps = 3; 2064 let ResourceCycles = [2, 1]; 2065} 2066def : InstRW<[WriteVSQRTPDYr], (instregex "VSQRTPDYr")>; 2067 2068// y,m256. 2069def WriteVSQRTPDYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { 2070 let Latency = 32; 2071 let NumMicroOps = 4; 2072 let ResourceCycles = [2, 1, 1]; 2073} 2074def : InstRW<[WriteVSQRTPDYm], (instregex "VSQRTPDYm")>; 2075 2076// RSQRT SS/PS. 2077// x,x. 2078def WriteRSQRTr : SchedWriteRes<[HWPort0]> { 2079 let Latency = 5; 2080} 2081def : InstRW<[WriteRSQRTr], (instregex "(V?)RSQRT(SS|PS)r(_Int)?")>; 2082 2083// x,m128. 2084def WriteRSQRTm : SchedWriteRes<[HWPort0, HWPort23]> { 2085 let Latency = 9; 2086 let NumMicroOps = 2; 2087 let ResourceCycles = [1, 1]; 2088} 2089def : InstRW<[WriteRSQRTm], (instregex "(V?)RSQRT(SS|PS)m(_Int)?")>; 2090 2091// RSQRTPS 256. 2092// y,y. 2093def WriteRSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> { 2094 let Latency = 7; 2095 let NumMicroOps = 3; 2096 let ResourceCycles = [2, 1]; 2097} 2098def : InstRW<[WriteRSQRTPSYr], (instregex "VRSQRTPSYr(_Int)?")>; 2099 2100// y,m256. 2101def WriteRSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { 2102 let Latency = 11; 2103 let NumMicroOps = 4; 2104 let ResourceCycles = [2, 1, 1]; 2105} 2106def : InstRW<[WriteRSQRTPSYm], (instregex "VRSQRTPSYm(_Int)?")>; 2107 2108//-- Logic instructions --// 2109 2110// AND, ANDN, OR, XOR PS/PD. 2111// x,x / v,v,v. 2112def : InstRW<[WriteP5], (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rr")>; 2113// x,m / v,v,m. 2114def : InstRW<[WriteP5Ld, ReadAfterLd], 2115 (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rm")>; 2116 2117//-- Other instructions --// 2118 2119// VZEROUPPER. 2120def WriteVZEROUPPER : SchedWriteRes<[]> { 2121 let NumMicroOps = 4; 2122} 2123def : InstRW<[WriteVZEROUPPER], (instregex "VZEROUPPER")>; 2124 2125// VZEROALL. 2126def WriteVZEROALL : SchedWriteRes<[]> { 2127 let NumMicroOps = 12; 2128} 2129def : InstRW<[WriteVZEROALL], (instregex "VZEROALL")>; 2130 2131// LDMXCSR. 2132def WriteLDMXCSR : SchedWriteRes<[HWPort0, HWPort6, HWPort23]> { 2133 let Latency = 6; 2134 let NumMicroOps = 3; 2135 let ResourceCycles = [1, 1, 1]; 2136} 2137def : InstRW<[WriteLDMXCSR], (instregex "(V)?LDMXCSR")>; 2138 2139// STMXCSR. 2140def WriteSTMXCSR : SchedWriteRes<[HWPort0, HWPort4, HWPort6, HWPort237]> { 2141 let Latency = 7; 2142 let NumMicroOps = 4; 2143 let ResourceCycles = [1, 1, 1, 1]; 2144} 2145def : InstRW<[WriteSTMXCSR], (instregex "(V)?STMXCSR")>; 2146 2147} // SchedModel 2148