1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Class used to build a model through a succession of successive calls 18 // to the NN API. 19 20 #ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MODEL_BUILDER_H 21 #define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MODEL_BUILDER_H 22 23 #include <LegacyUtils.h> 24 25 #include <memory> 26 #include <vector> 27 28 #include "Memory.h" 29 #include "NeuralNetworks.h" 30 31 namespace android { 32 namespace nn { 33 34 class CompilationBuilder; 35 class Device; 36 class ExecutionPlan; 37 class RuntimeMemory; 38 39 class ModelBuilder { 40 public: ModelBuilder()41 ModelBuilder() {} 42 // Returns an operand/operation type corresponding to a given extension operand/operation type. 43 int getExtensionType(const char* extensionName, uint16_t typeWithinExtension, int32_t* type); 44 // Adds an operand to the model. 45 int addOperand(const ANeuralNetworksOperandType& type); 46 int setOperandValue(uint32_t index, const void* buffer, size_t length); 47 int setOperandValueFromMemory(uint32_t index, const RuntimeMemory* memory, uint32_t offset, 48 size_t length); 49 int setOperandValueFromModel(uint32_t index, const ModelBuilder* value); 50 int setOperandSymmPerChannelQuantParams( 51 uint32_t index, const ANeuralNetworksSymmPerChannelQuantParams& extraParams); 52 int setOperandExtensionData(uint32_t index, const void* data, size_t length); 53 54 int addOperation(ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t* inputs, 55 uint32_t outputCount, const uint32_t* outputs); 56 int identifyInputsAndOutputs(uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, 57 const uint32_t* outputs); 58 int relaxComputationFloat32toFloat16(bool allow); isComputationFloat32RelaxedToFloat16()59 bool isComputationFloat32RelaxedToFloat16() const { return mRelaxComputationFloat32toFloat16; } 60 61 int finish(); isFinished()62 bool isFinished() const { return mCompletedModel; } isValid()63 bool isValid() const { return !mInvalidModel; } 64 hasOEMOperation()65 bool hasOEMOperation() const { return mHasOEMOperation; } hasExtensionOperation()66 bool hasExtensionOperation() const { return mHasExtensionOperation; } 67 68 // explicitDeviceList is true if the list of devices was provided explicitly 69 // via the ANeuralNetworksModel_createForDevices API (which has certain 70 // special semantics) and false otherwise. 71 int createCompilation(CompilationBuilder** compilation, 72 const std::vector<std::shared_ptr<Device>>& devices, 73 bool explicitDeviceList = false); 74 75 Model makeModel() const; 76 operandCount()77 uint32_t operandCount() const { 78 // We don't allow more than uint32_t worth of operands 79 return static_cast<uint32_t>(mOperands.size()); 80 } operationCount()81 uint32_t operationCount() const { 82 // We don't allow more than uint32_t worth of operations 83 return static_cast<uint32_t>(mOperations.size()); 84 } inputCount()85 uint32_t inputCount() const { return static_cast<uint32_t>(mInputIndexes.size()); } outputCount()86 uint32_t outputCount() const { return static_cast<uint32_t>(mOutputIndexes.size()); } getInputOperandIndex(uint32_t i)87 uint32_t getInputOperandIndex(uint32_t i) const { 88 CHECK_LT(i, mInputIndexes.size()); 89 return mInputIndexes[i]; 90 } getInputOperandIndexes()91 const std::vector<uint32_t>& getInputOperandIndexes() const { return mInputIndexes; } getInputOperand(uint32_t i)92 const Operand& getInputOperand(uint32_t i) const { 93 uint32_t index = getInputOperandIndex(i); 94 CHECK_LT(index, mOperands.size()); 95 return mOperands[index]; 96 } getOutputOperandIndex(uint32_t i)97 uint32_t getOutputOperandIndex(uint32_t i) const { 98 CHECK_LT(i, mOutputIndexes.size()); 99 return mOutputIndexes[i]; 100 } getOutputOperandIndexes()101 const std::vector<uint32_t>& getOutputOperandIndexes() const { return mOutputIndexes; } getOutputOperand(uint32_t i)102 const Operand& getOutputOperand(uint32_t i) const { 103 uint32_t index = getOutputOperandIndex(i); 104 CHECK_LT(index, mOperands.size()); 105 return mOperands[index]; 106 } getOperand(uint32_t index)107 const Operand& getOperand(uint32_t index) const { return mOperands[index]; } getOperation(uint32_t index)108 const Operation& getOperation(uint32_t index) const { return mOperations[index]; } getMemories()109 const MemoryTracker& getMemories() const { return mMemories; } getOperations()110 const std::vector<Operation>& getOperations() const { return mOperations; } getSortedOperationMapping()111 const std::vector<uint32_t>& getSortedOperationMapping() const { 112 return mSortedOperationIndexMap; 113 } getPointerToOperandValue(uint32_t offset)114 const uint8_t* getPointerToOperandValue(uint32_t offset) const { 115 return mSmallOperandValues.data() + offset; 116 } referencedModelCount()117 uint32_t referencedModelCount() const { 118 return static_cast<uint32_t>(mReferencedModels.size()); 119 } getReferencedModel(uint32_t i)120 const ModelBuilder* getReferencedModel(uint32_t i) const { 121 CHECK_LT(i, mReferencedModels.size()); 122 return mReferencedModels[i]; 123 } getReferencedModel(const Operand & operand)124 const ModelBuilder* getReferencedModel(const Operand& operand) const { 125 CHECK(operand.lifetime == Operand::LifeTime::SUBGRAPH); 126 return getReferencedModel(operand.location.offset); 127 } 128 129 // simulateFailureResultCode == ANEURALNETWORKS_NO_ERROR means behave normally. 130 int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices, uint32_t preference, 131 uint32_t priority, const OptionalTimePoint& deadline, ExecutionPlan* plan, 132 int simulateFailureResultCode = ANEURALNETWORKS_NO_ERROR) const; 133 134 private: 135 // TODO(b/132322449): move partitionTheWork, findBestDeviceForEachOperation, 136 // getPerformance, supportedByControlFlowInterpreter, 137 // isControlFlowOperationWithOperandOfUnknownSize, partitionTheWorkInternal, 138 // sortIntoRunOrder to CompilationBuilder? 139 140 // Populates bestDeviceForOperation 141 // 142 // For 0 <= i < operationCount(), produces 143 // 144 // 0 <= (*bestDeviceForOperation)[i] <= devices.size() 145 // 146 // (*bestDeviceForOperation)[i] == devices.size() is a special value meaning 147 // that this is a control flow operation scheduled for interpreted execution 148 // (see LogicalStep). 149 int findBestDeviceForEachOperation(uint32_t preference, 150 const std::vector<std::shared_ptr<Device>>& devices, 151 std::vector<int>* bestDeviceForOperation) const; 152 float getPerformance(uint32_t preference, const std::shared_ptr<Device> device) const; 153 float getPerformance(uint32_t preference, const std::shared_ptr<Device> device, 154 uint32_t operationIndex) const; 155 bool supportedByControlFlowInterpreter(uint32_t operationIndex) const; 156 157 // Returns true if the operation is IF or WHILE and has an inner or outer 158 // input or output of unknown size. 159 bool isControlFlowOperationWithOperandOfUnknownSize(uint32_t operationIndex) const; 160 161 int partitionTheWorkInternal(uint32_t sourceModelIndex, 162 const std::vector<std::shared_ptr<Device>>& devices, 163 uint32_t preference, uint32_t priority, 164 const OptionalTimePoint& deadline, ExecutionPlan* plan) const; 165 166 // Return true if either mCompleteModel or mInvalidModel is true. 167 bool badState(const char* name); 168 169 // Removes some trailing operation inputs that are set to default values. 170 // 171 // Some drivers reject operations based on the argument count even when the 172 // optional arguments are set to default values. This transformation enables 173 // more drivers to execute the model. See http://b/147105700. 174 void removeTrailingArgumentsWithDefaultValues(); 175 uint32_t getNumTrailingArgumentsToRemove(const Operation& operation) const; 176 177 // Sorts the operations to be in the correct order for single threaded 178 // node-at-a-time execution. 179 bool sortIntoRunOrder(); 180 181 // Copies the large values to a shared memory, if we have any. 182 int copyLargeValuesToSharedMemory(); 183 184 // The operations of the graph. 185 std::vector<Operation> mOperations; 186 // The mapping from sorted index to the original index of operations in mOperations. 187 // mSortedOperationIndexMap is empty before sortIntoRunOrder() is called. 188 std::vector<uint32_t> mSortedOperationIndexMap; 189 // Is at least one of those operations an OEM_OPERATION? 190 bool mHasOEMOperation = false; 191 // Is at least one of those operations an extension operation? 192 bool mHasExtensionOperation = false; 193 // The description of the operands of the graph. 194 std::vector<Operand> mOperands; 195 // Is at least one of those operands an OEM operand? 196 bool mHasOEMOperand = false; 197 // The indexes of input operands of the model. 198 std::vector<uint32_t> mInputIndexes; 199 // The indexes of output operands of the model. 200 std::vector<uint32_t> mOutputIndexes; 201 202 MemoryTracker mMemories; 203 204 // The value of the small operands that are defined at model 205 // creation time. 206 std::vector<uint8_t> mSmallOperandValues; 207 208 struct LargeValue { 209 uint32_t operandIndex; 210 const void* buffer; 211 }; 212 // Operand index and buffer pointer for all the large operand values of this model. 213 std::vector<LargeValue> mLargeOperandValues; 214 // The shared memory region that will contain the large values. 215 std::unique_ptr<MemoryAshmem> mLargeValueMemory; 216 217 // Once the model has been finished, we should not allow further 218 // modifications to the model. 219 bool mCompletedModel = false; 220 221 // Any invalid manipulation of the model will mark the model invalid. 222 // No further modifications are allowed to the model. 223 bool mInvalidModel = false; 224 225 // 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or 226 // precision as low as that of the IEEE 754 16-bit floating-point format. 227 // 'false' indicates TENSOR_FLOAT32 must be calculated using at least the 228 // range and precision of the IEEE 754 32-bit floating-point format. 229 bool mRelaxComputationFloat32toFloat16 = false; 230 231 // Models referenced by operands in this model. 232 std::vector<const ModelBuilder*> mReferencedModels; 233 234 // Main subgraphs of models referenced by operands in this model. Required 235 // for validateOperation(). 236 std::vector<Model::Subgraph> mReferencedSubgraphsForValidation; 237 238 class ModelMaker; 239 }; 240 241 } // namespace nn 242 } // namespace android 243 244 #endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MODEL_BUILDER_H 245