1//=- AArch64SchedM1.td - Samsung Exynos-M1 Scheduling Defs ---*- tablegen -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the machine model for Samsung Exynos-M1 to support 11// instruction scheduling and other instruction cost heuristics. 12// 13//===----------------------------------------------------------------------===// 14 15//===----------------------------------------------------------------------===// 16// The Exynos-M1 is a traditional superscalar microprocessor with a 17// 4-wide in-order stage for decode and dispatch and a wider issue stage. 18// The execution units and loads and stores are out-of-order. 19 20def ExynosM1Model : SchedMachineModel { 21 let IssueWidth = 4; // Up to 4 uops per cycle. 22 let MicroOpBufferSize = 96; // ROB size. 23 let LoopMicroOpBufferSize = 24; // Based on the instruction queue size. 24 let LoadLatency = 4; // Optimistic load cases. 25 let MispredictPenalty = 14; // Minimum branch misprediction penalty. 26 let CompleteModel = 0; // Use the default model otherwise. 27} 28 29//===----------------------------------------------------------------------===// 30// Define each kind of processor resource and number available on the Exynos-M1, 31// which has 9 pipelines, each with its own queue with out-of-order dispatch. 32 33def M1UnitA : ProcResource<2>; // Simple integer 34def M1UnitC : ProcResource<1>; // Simple and complex integer 35def M1UnitB : ProcResource<2>; // Branch 36def M1UnitL : ProcResource<1>; // Load 37def M1UnitS : ProcResource<1>; // Store 38def M1PipeF0 : ProcResource<1>; // FP #0 39def M1PipeF1 : ProcResource<1>; // FP #1 40 41let Super = M1PipeF0 in { 42 def M1UnitFMAC : ProcResource<1>; // FP multiplication 43 def M1UnitFCVT : ProcResource<1>; // FP conversion 44 def M1UnitNAL0 : ProcResource<1>; // Simple vector. 45 def M1UnitNMISC : ProcResource<1>; // Miscellanea 46 def M1UnitNCRYPT : ProcResource<1>; // Cryptographic 47} 48 49let Super = M1PipeF1 in { 50 def M1UnitFADD : ProcResource<1>; // Simple FP 51 let BufferSize = 1 in 52 def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized) 53 def M1UnitNAL1 : ProcResource<1>; // Simple vector. 54 def M1UnitFST : ProcResource<1>; // FP store 55} 56 57let SchedModel = ExynosM1Model in { 58 def M1UnitALU : ProcResGroup<[M1UnitA, 59 M1UnitC]>; // All simple integer. 60 def M1UnitNALU : ProcResGroup<[M1UnitNAL0, 61 M1UnitNAL1]>; // All simple vector. 62} 63 64let SchedModel = ExynosM1Model in { 65 66//===----------------------------------------------------------------------===// 67// Coarse scheduling model for the Exynos-M1. 68 69// Branch instructions. 70// TODO: Non-conditional direct branches take zero cycles and units. 71def : WriteRes<WriteBr, [M1UnitB]> { let Latency = 1; } 72def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; } 73// TODO: Branch and link is much different. 74 75// Arithmetic and logical integer instructions. 76def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; } 77// TODO: Shift over 3 and some extensions take 2 cycles. 78def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; } 79def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; } 80def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; } 81 82// Move instructions. 83def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; } 84 85// Divide and multiply instructions. 86// TODO: Division blocks the divider inside C. 87def : WriteRes<WriteID32, [M1UnitC]> { let Latency = 13; } 88def : WriteRes<WriteID64, [M1UnitC]> { let Latency = 21; } 89// TODO: Long multiplication take 5 cycles and also the ALU. 90// TODO: Multiplication with accumulation can be advanced. 91def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; } 92// TODO: 64-bit multiplication has a throughput of 1/2. 93def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4; } 94 95// Miscellaneous instructions. 96def : WriteRes<WriteExtr, [M1UnitALU, 97 M1UnitALU]> { let Latency = 2; } 98 99// TODO: The latency for the post or pre register is 1 cycle. 100def : WriteRes<WriteAdr, []> { let Latency = 0; } 101 102// Load instructions. 103def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; } 104// TODO: Extended address requires also the ALU. 105def : WriteRes<WriteLDIdx, [M1UnitL]> { let Latency = 5; } 106def : WriteRes<WriteLDHi, [M1UnitALU]> { let Latency = 4; } 107 108// Store instructions. 109def : WriteRes<WriteST, [M1UnitS]> { let Latency = 1; } 110// TODO: Extended address requires also the ALU. 111def : WriteRes<WriteSTIdx, [M1UnitS]> { let Latency = 1; } 112def : WriteRes<WriteSTP, [M1UnitS]> { let Latency = 1; } 113def : WriteRes<WriteSTX, [M1UnitS]> { let Latency = 1; } 114 115// FP data instructions. 116def : WriteRes<WriteF, [M1UnitFADD]> { let Latency = 3; } 117// TODO: FCCMP is much different. 118def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; } 119// TODO: DP takes longer. 120def : WriteRes<WriteFDiv, [M1UnitFVAR]> { let Latency = 15; } 121// TODO: MACC takes longer. 122def : WriteRes<WriteFMul, [M1UnitFMAC]> { let Latency = 4; } 123 124// FP miscellaneous instructions. 125// TODO: Conversion between register files is much different. 126def : WriteRes<WriteFCvt, [M1UnitFCVT]> { let Latency = 3; } 127def : WriteRes<WriteFImm, [M1UnitNALU]> { let Latency = 1; } 128// TODO: Copy from FPR to GPR is much different. 129def : WriteRes<WriteFCopy, [M1UnitS]> { let Latency = 4; } 130 131// FP load instructions. 132// TODO: ASIMD loads are much different. 133def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; } 134 135// FP store instructions. 136// TODO: ASIMD stores are much different. 137def : WriteRes<WriteVST, [M1UnitS, M1UnitFST]> { let Latency = 1; } 138 139// ASIMD FP instructions. 140// TODO: Other operations are much different. 141def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; } 142 143// Other miscellaneous instructions. 144def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 145def : WriteRes<WriteBarrier, []> { let Latency = 1; } 146def : WriteRes<WriteHint, []> { let Latency = 1; } 147def : WriteRes<WriteSys, []> { let Latency = 1; } 148 149//===----------------------------------------------------------------------===// 150// Generic fast forwarding. 151 152// TODO: Add FP register forwarding rules. 153 154def : ReadAdvance<ReadI, 0>; 155def : ReadAdvance<ReadISReg, 0>; 156def : ReadAdvance<ReadIEReg, 0>; 157def : ReadAdvance<ReadIM, 0>; 158// Integer multiply-accumulate. 159// TODO: The forwarding for WriteIM64 saves actually 3 cycles. 160def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>; 161def : ReadAdvance<ReadID, 0>; 162def : ReadAdvance<ReadExtrHi, 0>; 163def : ReadAdvance<ReadAdrBase, 0>; 164def : ReadAdvance<ReadVLD, 0>; 165 166//===----------------------------------------------------------------------===// 167// Finer scheduling model for the Exynos-M1. 168 169def M1WriteNEONA : SchedWriteRes<[M1UnitNALU, 170 M1UnitNALU, 171 M1UnitFADD]> { let Latency = 9; } 172def M1WriteNEONB : SchedWriteRes<[M1UnitNALU, 173 M1UnitFST]> { let Latency = 5; } 174def M1WriteNEONC : SchedWriteRes<[M1UnitNALU, 175 M1UnitFST]> { let Latency = 6; } 176def M1WriteNEOND : SchedWriteRes<[M1UnitNALU, 177 M1UnitFST, 178 M1UnitL]> { let Latency = 10; } 179def M1WriteNEONE : SchedWriteRes<[M1UnitFCVT, 180 M1UnitFST]> { let Latency = 8; } 181def M1WriteNEONF : SchedWriteRes<[M1UnitFCVT, 182 M1UnitFST, 183 M1UnitL]> { let Latency = 13; } 184def M1WriteNEONG : SchedWriteRes<[M1UnitNMISC, 185 M1UnitFST]> { let Latency = 6; } 186def M1WriteNEONH : SchedWriteRes<[M1UnitNALU, 187 M1UnitFST]> { let Latency = 3; } 188def M1WriteNEONI : SchedWriteRes<[M1UnitFST, 189 M1UnitL]> { let Latency = 9; } 190def M1WriteNEONJ : SchedWriteRes<[M1UnitNMISC, 191 M1UnitFMAC]> { let Latency = 6; } 192def M1WriteNEONK : SchedWriteRes<[M1UnitNMISC, 193 M1UnitFMAC]> { let Latency = 7; } 194def M1WriteALU1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; } 195def M1WriteB : SchedWriteRes<[M1UnitB]> { let Latency = 1; } 196// FIXME: This is the worst case, conditional branch and link. 197def M1WriteBL : SchedWriteRes<[M1UnitB, 198 M1UnitALU]> { let Latency = 1; } 199// FIXME: This is the worst case, when using LR. 200def M1WriteBLR : SchedWriteRes<[M1UnitB, 201 M1UnitALU, 202 M1UnitALU]> { let Latency = 2; } 203def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; } 204def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; } 205def M1WriteFADD3 : SchedWriteRes<[M1UnitFADD]> { let Latency = 3; } 206def M1WriteFCVT3 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 3; } 207def M1WriteFCVT4 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 4; } 208def M1WriteFMAC4 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 4; } 209def M1WriteFMAC5 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 5; } 210def M1WriteFVAR15 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 15; } 211def M1WriteFVAR23 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 23; } 212def M1WriteNALU1 : SchedWriteRes<[M1UnitNALU]> { let Latency = 1; } 213def M1WriteNALU2 : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; } 214def M1WriteNAL11 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 1; } 215def M1WriteNAL12 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 2; } 216def M1WriteNAL13 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 3; } 217def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } 218def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; } 219def M1WriteNMISC1 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 1; } 220def M1WriteNMISC2 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 2; } 221def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; } 222def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; } 223def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; } 224def M1WriteTB : SchedWriteRes<[M1UnitC, 225 M1UnitALU]> { let Latency = 2; } 226 227// Branch instructions 228def : InstRW<[M1WriteB ], (instrs Bcc)>; 229def : InstRW<[M1WriteBL], (instrs BL)>; 230def : InstRW<[M1WriteBLR], (instrs BLR)>; 231def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>; 232def : InstRW<[M1WriteTB], (instregex "^TBN?Z[WX]")>; 233 234// Arithmetic and logical integer instructions. 235def : InstRW<[M1WriteALU1], (instrs COPY)>; 236 237// Divide and multiply instructions. 238 239// Miscellaneous instructions. 240 241// Load instructions. 242 243// Store instructions. 244 245// FP data instructions. 246def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>; 247def : InstRW<[M1WriteFADD3], (instregex "^F(ADD|SUB)[DS]rr")>; 248def : InstRW<[M1WriteNEONG], (instregex "^FCCMPE?[DS]rr")>; 249def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>; 250def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>; 251def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>; 252def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>; 253def : InstRW<[M1WriteFMAC4], (instregex "^FN?MUL[DS]rr")>; 254def : InstRW<[M1WriteFMAC5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>; 255def : InstRW<[M1WriteFCVT3], (instregex "^FRINT.+r")>; 256def : InstRW<[M1WriteNEONH], (instregex "^FCSEL[DS]rrr")>; 257def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>; 258def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>; 259 260// FP miscellaneous instructions. 261def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>; 262def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>; 263def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>; 264def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>; 265def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>; 266def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>; 267 268// FP load instructions. 269 270// FP store instructions. 271 272// ASIMD instructions. 273def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>; 274def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>; 275def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>; 276def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>; 277def : InstRW<[M1WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>; 278def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>; 279def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>; 280def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>; 281def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>; 282def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>; 283def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>; 284def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>; 285def : InstRW<[M1WriteNALU1], (instregex "^CMTSTv")>; 286def : InstRW<[M1WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>; 287def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>; 288def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>; 289def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>; 290def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>; 291def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>; 292def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>; 293def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>; 294def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>; 295def : InstRW<[M1WriteNALU1], (instregex "^[SU]?SH(L|LL|R)2?v")>; 296def : InstRW<[M1WriteNALU1], (instregex "^S[LR]Iv")>; 297def : InstRW<[M1WriteNAL13], (instregex "^[SU]?(Q|QR|R)?SHR(N|U|UN)?2?v")>; 298def : InstRW<[M1WriteNAL13], (instregex "^[SU](Q|QR|R)SHLU?v")>; 299 300// ASIMD FP instructions. 301def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>; 302def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>; 303def : InstRW<[M1WriteNEONA], (instregex "^FADDP")>; 304def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>; 305def : InstRW<[M1WriteFCVT3], (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>; 306def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>; 307def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>; 308def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>; 309def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>; 310def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>; 311def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>; 312def : InstRW<[M1WriteNEONJ], (instregex "^FMULX?v.i")>; 313def : InstRW<[M1WriteFMAC4], (instregex "^FMULX?v.f")>; 314def : InstRW<[M1WriteNEONK], (instregex "^FML[AS]v.i")>; 315def : InstRW<[M1WriteFMAC5], (instregex "^FML[AS]v.f")>; 316def : InstRW<[M1WriteFCVT3], (instregex "^FRINT[AIMNPXZ]v")>; 317 318// ASIMD miscellaneous instructions. 319def : InstRW<[M1WriteNALU1], (instregex "^RBITv")>; 320def : InstRW<[M1WriteNAL11], (instregex "^(BIF|BIT|BSL)v")>; 321def : InstRW<[M1WriteNALU1], (instregex "^CPY")>; 322def : InstRW<[M1WriteNEONB], (instregex "^DUPv.+gpr")>; 323def : InstRW<[M1WriteNALU1], (instregex "^DUPv.+lane")>; 324def : InstRW<[M1WriteNAL13], (instregex "^[SU]?Q?XTU?Nv")>; 325def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>; 326def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev")>; 327def : InstRW<[M1WriteNMISC1], (instregex "^[FU](RECP|RSQRT)Xv")>; 328def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)Sv")>; 329def : InstRW<[M1WriteNALU1], (instregex "^REV(16|32|64)v")>; 330def : InstRW<[M1WriteNAL11], (instregex "^TB[LX]v8i8One")>; 331def : InstRW<[WriteSequence<[M1WriteNAL11], 2>], 332 (instregex "^TB[LX]v8i8Two")>; 333def : InstRW<[WriteSequence<[M1WriteNAL11], 3>], 334 (instregex "^TB[LX]v8i8Three")>; 335def : InstRW<[WriteSequence<[M1WriteNAL11], 4>], 336 (instregex "^TB[LX]v8i8Four")>; 337def : InstRW<[M1WriteNAL12], (instregex "^TB[LX]v16i8One")>; 338def : InstRW<[WriteSequence<[M1WriteNAL12], 2>], 339 (instregex "^TB[LX]v16i8Two")>; 340def : InstRW<[WriteSequence<[M1WriteNAL12], 3>], 341 (instregex "^TB[LX]v16i8Three")>; 342def : InstRW<[WriteSequence<[M1WriteNAL12], 4>], 343 (instregex "^TB[LX]v16i8Four")>; 344def : InstRW<[M1WriteNEOND], (instregex "^[SU]MOVv")>; 345def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>; 346def : InstRW<[M1WriteNALU1], (instregex "^(TRN|UZP)[12](v8i8|v4i16|v2i32)")>; 347def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64)")>; 348def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>; 349 350// ASIMD load instructions. 351 352// ASIMD store instructions. 353 354// Cryptography instructions. 355def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } 356def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>; 357def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AES")>; 358 359def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>; 360def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>; 361def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>; 362def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>; 363def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>; 364 365// CRC instructions. 366def : InstRW<[M1WriteC2], (instregex "^CRC32")>; 367 368} // SchedModel = ExynosM1Model 369