1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ 18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ 19 20 #include "base/macros.h" 21 #include "scheduler.h" 22 23 namespace art HIDDEN { 24 namespace arm64 { 25 26 static constexpr uint32_t kArm64MemoryLoadLatency = 5; 27 static constexpr uint32_t kArm64MemoryStoreLatency = 3; 28 29 static constexpr uint32_t kArm64CallInternalLatency = 10; 30 static constexpr uint32_t kArm64CallLatency = 5; 31 32 // AArch64 instruction latency. 33 // We currently assume that all arm64 CPUs share the same instruction latency list. 34 static constexpr uint32_t kArm64IntegerOpLatency = 2; 35 static constexpr uint32_t kArm64FloatingPointOpLatency = 5; 36 37 38 static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3; 39 static constexpr uint32_t kArm64DivDoubleLatency = 30; 40 static constexpr uint32_t kArm64DivFloatLatency = 15; 41 static constexpr uint32_t kArm64DivIntegerLatency = 5; 42 static constexpr uint32_t kArm64LoadStringInternalLatency = 7; 43 static constexpr uint32_t kArm64MulFloatingPointLatency = 6; 44 static constexpr uint32_t kArm64MulIntegerLatency = 6; 45 static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5; 46 static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency; 47 48 static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10; 49 static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6; 50 static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10; 51 static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6; 52 static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12; 53 static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12; 54 static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16; 55 static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60; 56 static constexpr uint32_t kArm64SIMDDivFloatLatency = 30; 57 static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; 58 59 class SchedulingLatencyVisitorARM64 final : public SchedulingLatencyVisitor { 60 public: 61 // Default visitor for instructions not handled specifically below. VisitInstruction(HInstruction * ATTRIBUTE_UNUSED)62 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { 63 last_visited_latency_ = kArm64IntegerOpLatency; 64 } 65 66 // We add a second unused parameter to be able to use this macro like the others 67 // defined in `nodes.h`. 68 #define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \ 69 M(ArrayGet , unused) \ 70 M(ArrayLength , unused) \ 71 M(ArraySet , unused) \ 72 M(BoundsCheck , unused) \ 73 M(Div , unused) \ 74 M(InstanceFieldGet , unused) \ 75 M(InstanceOf , unused) \ 76 M(LoadString , unused) \ 77 M(Mul , unused) \ 78 M(NewArray , unused) \ 79 M(NewInstance , unused) \ 80 M(Rem , unused) \ 81 M(StaticFieldGet , unused) \ 82 M(SuspendCheck , unused) \ 83 M(TypeConversion , unused) \ 84 M(VecReplicateScalar , unused) \ 85 M(VecExtractScalar , unused) \ 86 M(VecReduce , unused) \ 87 M(VecCnv , unused) \ 88 M(VecNeg , unused) \ 89 M(VecAbs , unused) \ 90 M(VecNot , unused) \ 91 M(VecAdd , unused) \ 92 M(VecHalvingAdd , unused) \ 93 M(VecSub , unused) \ 94 M(VecMul , unused) \ 95 M(VecDiv , unused) \ 96 M(VecMin , unused) \ 97 M(VecMax , unused) \ 98 M(VecAnd , unused) \ 99 M(VecAndNot , unused) \ 100 M(VecOr , unused) \ 101 M(VecXor , unused) \ 102 M(VecShl , unused) \ 103 M(VecShr , unused) \ 104 M(VecUShr , unused) \ 105 M(VecSetScalars , unused) \ 106 M(VecMultiplyAccumulate, unused) \ 107 M(VecLoad , unused) \ 108 M(VecStore , unused) 109 110 #define FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(M) \ 111 M(BinaryOperation , unused) \ 112 M(Invoke , unused) 113 114 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ 115 M(BitwiseNegatedRight, unused) \ 116 M(MultiplyAccumulate, unused) \ 117 M(IntermediateAddress, unused) \ 118 M(IntermediateAddressIndex, unused) \ 119 M(DataProcWithShifterOp, unused) 120 121 #define DECLARE_VISIT_INSTRUCTION(type, unused) \ 122 void Visit##type(H##type* instruction) override; 123 124 FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 125 FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 126 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 127 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) 128 129 #undef DECLARE_VISIT_INSTRUCTION 130 131 private: 132 void HandleSimpleArithmeticSIMD(HVecOperation *instr); 133 void HandleVecAddress(HVecMemoryOperation* instruction, size_t size); 134 }; 135 136 class HSchedulerARM64 : public HScheduler { 137 public: HSchedulerARM64(SchedulingNodeSelector * selector)138 explicit HSchedulerARM64(SchedulingNodeSelector* selector) 139 : HScheduler(&arm64_latency_visitor_, selector) {} ~HSchedulerARM64()140 ~HSchedulerARM64() override {} 141 IsSchedulable(const HInstruction * instruction)142 bool IsSchedulable(const HInstruction* instruction) const override { 143 #define CASE_INSTRUCTION_KIND(type, unused) case \ 144 HInstruction::InstructionKind::k##type: 145 switch (instruction->GetKind()) { 146 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) 147 return true; 148 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND) 149 return true; 150 FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND) 151 return true; 152 default: 153 return HScheduler::IsSchedulable(instruction); 154 } 155 #undef CASE_INSTRUCTION_KIND 156 } 157 158 // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized 159 // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler; 160 // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of 161 // SIMD&FP registers are callee saved) so don't reorder such vector instructions. 162 // 163 // TODO: remove this when a proper support of SIMD registers is introduced to the compiler. IsSchedulingBarrier(const HInstruction * instr)164 bool IsSchedulingBarrier(const HInstruction* instr) const override { 165 return HScheduler::IsSchedulingBarrier(instr) || 166 instr->IsVecReduce() || 167 instr->IsVecExtractScalar() || 168 instr->IsVecSetScalars() || 169 instr->IsVecReplicateScalar(); 170 } 171 172 private: 173 SchedulingLatencyVisitorARM64 arm64_latency_visitor_; 174 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64); 175 }; 176 177 } // namespace arm64 178 } // namespace art 179 180 #endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ 181