1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ 18 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ 19 20 #include "base/macros.h" 21 #include "code_generator_arm_vixl.h" 22 #include "scheduler.h" 23 24 namespace art HIDDEN { 25 namespace arm { 26 // AArch32 instruction latencies. 27 // We currently assume that all ARM CPUs share the same instruction latency list. 28 // The following latencies were tuned based on performance experiments and 29 // automatic tuning using differential evolution approach on various benchmarks. 30 static constexpr uint32_t kArmIntegerOpLatency = 2; 31 static constexpr uint32_t kArmFloatingPointOpLatency = 11; 32 static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4; 33 static constexpr uint32_t kArmMulIntegerLatency = 6; 34 static constexpr uint32_t kArmMulFloatingPointLatency = 11; 35 static constexpr uint32_t kArmDivIntegerLatency = 10; 36 static constexpr uint32_t kArmDivFloatLatency = 20; 37 static constexpr uint32_t kArmDivDoubleLatency = 25; 38 static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11; 39 static constexpr uint32_t kArmMemoryLoadLatency = 9; 40 static constexpr uint32_t kArmMemoryStoreLatency = 9; 41 static constexpr uint32_t kArmMemoryBarrierLatency = 6; 42 static constexpr uint32_t kArmBranchLatency = 4; 43 static constexpr uint32_t kArmCallLatency = 5; 44 static constexpr uint32_t kArmCallInternalLatency = 29; 45 static constexpr uint32_t kArmLoadStringInternalLatency = 10; 46 static constexpr uint32_t kArmNopLatency = 2; 47 static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; 48 static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; 49 50 class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor { 51 public: SchedulingLatencyVisitorARM(CodeGenerator * codegen)52 explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) 53 : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {} 54 55 // Default visitor for instructions not handled specifically below. VisitInstruction(HInstruction * ATTRIBUTE_UNUSED)56 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { 57 last_visited_latency_ = kArmIntegerOpLatency; 58 } 59 60 // We add a second unused parameter to be able to use this macro like the others 61 // defined in `nodes.h`. 62 #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \ 63 M(ArrayGet, unused) \ 64 M(ArrayLength, unused) \ 65 M(ArraySet, unused) \ 66 M(Add, unused) \ 67 M(Sub, unused) \ 68 M(And, unused) \ 69 M(Or, unused) \ 70 M(Ror, unused) \ 71 M(Xor, unused) \ 72 M(Shl, unused) \ 73 M(Shr, unused) \ 74 M(UShr, unused) \ 75 M(Mul, unused) \ 76 M(Div, unused) \ 77 M(Condition, unused) \ 78 M(Compare, unused) \ 79 M(BoundsCheck, unused) \ 80 M(PredicatedInstanceFieldGet, unused) \ 81 M(InstanceFieldGet, unused) \ 82 M(InstanceFieldSet, unused) \ 83 M(InstanceOf, unused) \ 84 M(Invoke, unused) \ 85 M(LoadString, unused) \ 86 M(NewArray, unused) \ 87 M(NewInstance, unused) \ 88 M(Rem, unused) \ 89 M(StaticFieldGet, unused) \ 90 M(StaticFieldSet, unused) \ 91 M(SuspendCheck, unused) \ 92 M(TypeConversion, unused) 93 94 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ 95 M(BitwiseNegatedRight, unused) \ 96 M(MultiplyAccumulate, unused) \ 97 M(IntermediateAddress, unused) \ 98 M(IntermediateAddressIndex, unused) \ 99 M(DataProcWithShifterOp, unused) 100 101 #define DECLARE_VISIT_INSTRUCTION(type, unused) \ 102 void Visit##type(H##type* instruction) override; 103 104 FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 105 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) 106 FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) 107 108 #undef DECLARE_VISIT_INSTRUCTION 109 110 private: 111 bool CanGenerateTest(HCondition* cond); 112 void HandleGenerateConditionWithZero(IfCondition cond); 113 void HandleGenerateLongTestConstant(HCondition* cond); 114 void HandleGenerateLongTest(HCondition* cond); 115 void HandleGenerateLongComparesAndJumps(); 116 void HandleGenerateTest(HCondition* cond); 117 void HandleGenerateConditionGeneric(HCondition* cond); 118 void HandleGenerateEqualLong(HCondition* cond); 119 void HandleGenerateConditionLong(HCondition* cond); 120 void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond); 121 void HandleCondition(HCondition* instr); 122 void HandleBinaryOperationLantencies(HBinaryOperation* instr); 123 void HandleBitwiseOperationLantencies(HBinaryOperation* instr); 124 void HandleShiftLatencies(HBinaryOperation* instr); 125 void HandleDivRemConstantIntegralLatencies(int32_t imm); 126 void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info); 127 void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info); 128 void HandleGenerateDataProcInstruction(bool internal_latency = false); 129 void HandleGenerateDataProc(HDataProcWithShifterOp* instruction); 130 void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction); 131 132 // The latency setting for each HInstruction depends on how CodeGenerator may generate code, 133 // latency visitors may query CodeGenerator for such information for accurate latency settings. 134 CodeGeneratorARMVIXL* codegen_; 135 }; 136 137 class HSchedulerARM : public HScheduler { 138 public: HSchedulerARM(SchedulingNodeSelector * selector,SchedulingLatencyVisitorARM * arm_latency_visitor)139 HSchedulerARM(SchedulingNodeSelector* selector, 140 SchedulingLatencyVisitorARM* arm_latency_visitor) 141 : HScheduler(arm_latency_visitor, selector) {} ~HSchedulerARM()142 ~HSchedulerARM() override {} 143 IsSchedulable(const HInstruction * instruction)144 bool IsSchedulable(const HInstruction* instruction) const override { 145 #define CASE_INSTRUCTION_KIND(type, unused) case \ 146 HInstruction::InstructionKind::k##type: 147 switch (instruction->GetKind()) { 148 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) 149 return true; 150 FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND) 151 return true; 152 default: 153 return HScheduler::IsSchedulable(instruction); 154 } 155 #undef CASE_INSTRUCTION_KIND 156 } 157 158 private: 159 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM); 160 }; 161 162 } // namespace arm 163 } // namespace art 164 165 #endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ 166