1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Provides C++ classes to more easily use the Neural Networks API. 18 19 #ifndef ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H 20 #define ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H 21 22 #include "NeuralNetworks.h" 23 24 #include <math.h> 25 #include <optional> 26 #include <string> 27 #include <vector> 28 29 namespace android { 30 namespace nn { 31 namespace wrapper { 32 33 enum class Type { 34 FLOAT32 = ANEURALNETWORKS_FLOAT32, 35 INT32 = ANEURALNETWORKS_INT32, 36 UINT32 = ANEURALNETWORKS_UINT32, 37 TENSOR_FLOAT32 = ANEURALNETWORKS_TENSOR_FLOAT32, 38 TENSOR_INT32 = ANEURALNETWORKS_TENSOR_INT32, 39 TENSOR_QUANT8_ASYMM = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, 40 BOOL = ANEURALNETWORKS_BOOL, 41 TENSOR_QUANT16_SYMM = ANEURALNETWORKS_TENSOR_QUANT16_SYMM, 42 TENSOR_FLOAT16 = ANEURALNETWORKS_TENSOR_FLOAT16, 43 TENSOR_BOOL8 = ANEURALNETWORKS_TENSOR_BOOL8, 44 FLOAT16 = ANEURALNETWORKS_FLOAT16, 45 TENSOR_QUANT8_SYMM_PER_CHANNEL = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL, 46 TENSOR_QUANT16_ASYMM = ANEURALNETWORKS_TENSOR_QUANT16_ASYMM, 47 TENSOR_QUANT8_SYMM = ANEURALNETWORKS_TENSOR_QUANT8_SYMM, 48 }; 49 50 enum class ExecutePreference { 51 PREFER_LOW_POWER = ANEURALNETWORKS_PREFER_LOW_POWER, 52 PREFER_FAST_SINGLE_ANSWER = ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER, 53 PREFER_SUSTAINED_SPEED = ANEURALNETWORKS_PREFER_SUSTAINED_SPEED 54 }; 55 56 enum class Result { 57 NO_ERROR = ANEURALNETWORKS_NO_ERROR, 58 OUT_OF_MEMORY = ANEURALNETWORKS_OUT_OF_MEMORY, 59 INCOMPLETE = ANEURALNETWORKS_INCOMPLETE, 60 UNEXPECTED_NULL = ANEURALNETWORKS_UNEXPECTED_NULL, 61 BAD_DATA = ANEURALNETWORKS_BAD_DATA, 62 OP_FAILED = ANEURALNETWORKS_OP_FAILED, 63 UNMAPPABLE = ANEURALNETWORKS_UNMAPPABLE, 64 BAD_STATE = ANEURALNETWORKS_BAD_STATE, 65 OUTPUT_INSUFFICIENT_SIZE = ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE, 66 UNAVAILABLE_DEVICE = ANEURALNETWORKS_UNAVAILABLE_DEVICE, 67 }; 68 69 struct SymmPerChannelQuantParams { 70 ANeuralNetworksSymmPerChannelQuantParams params; 71 std::vector<float> scales; 72 SymmPerChannelQuantParamsSymmPerChannelQuantParams73 SymmPerChannelQuantParams(std::vector<float> scalesVec, uint32_t channelDim) 74 : scales(std::move(scalesVec)) { 75 params = { 76 .channelDim = channelDim, 77 .scaleCount = static_cast<uint32_t>(scales.size()), 78 .scales = scales.size() > 0 ? scales.data() : nullptr, 79 }; 80 } 81 SymmPerChannelQuantParamsSymmPerChannelQuantParams82 SymmPerChannelQuantParams(const SymmPerChannelQuantParams& other) 83 : params(other.params), scales(other.scales) { 84 params.scales = scales.size() > 0 ? scales.data() : nullptr; 85 } 86 87 SymmPerChannelQuantParams& operator=(const SymmPerChannelQuantParams& other) { 88 if (this != &other) { 89 params = other.params; 90 scales = other.scales; 91 params.scales = scales.size() > 0 ? scales.data() : nullptr; 92 } 93 return *this; 94 } 95 }; 96 97 struct OperandType { 98 ANeuralNetworksOperandType operandType; 99 std::vector<uint32_t> dimensions; 100 std::optional<SymmPerChannelQuantParams> channelQuant; 101 OperandTypeOperandType102 OperandType(const OperandType& other) 103 : operandType(other.operandType), 104 dimensions(other.dimensions), 105 channelQuant(other.channelQuant) { 106 operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr; 107 } 108 109 OperandType& operator=(const OperandType& other) { 110 if (this != &other) { 111 operandType = other.operandType; 112 dimensions = other.dimensions; 113 channelQuant = other.channelQuant; 114 operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr; 115 } 116 return *this; 117 } 118 119 OperandType(Type type, std::vector<uint32_t> d, float scale = 0.0f, int32_t zeroPoint = 0) dimensionsOperandType120 : dimensions(std::move(d)), channelQuant(std::nullopt) { 121 operandType = { 122 .type = static_cast<int32_t>(type), 123 .dimensionCount = static_cast<uint32_t>(dimensions.size()), 124 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr, 125 .scale = scale, 126 .zeroPoint = zeroPoint, 127 }; 128 } 129 OperandTypeOperandType130 OperandType(Type type, std::vector<uint32_t> data, float scale, int32_t zeroPoint, 131 SymmPerChannelQuantParams&& channelQuant) 132 : dimensions(std::move(data)), channelQuant(std::move(channelQuant)) { 133 operandType = { 134 .type = static_cast<int32_t>(type), 135 .dimensionCount = static_cast<uint32_t>(dimensions.size()), 136 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr, 137 .scale = scale, 138 .zeroPoint = zeroPoint, 139 }; 140 } 141 }; 142 143 class Memory { 144 public: Memory(size_t size,int protect,int fd,size_t offset)145 Memory(size_t size, int protect, int fd, size_t offset) { 146 mValid = ANeuralNetworksMemory_createFromFd(size, protect, fd, offset, &mMemory) == 147 ANEURALNETWORKS_NO_ERROR; 148 } 149 Memory(AHardwareBuffer * buffer)150 Memory(AHardwareBuffer* buffer) { 151 mValid = ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &mMemory) == 152 ANEURALNETWORKS_NO_ERROR; 153 } 154 ~Memory()155 ~Memory() { ANeuralNetworksMemory_free(mMemory); } 156 157 // Disallow copy semantics to ensure the runtime object can only be freed 158 // once. Copy semantics could be enabled if some sort of reference counting 159 // or deep-copy system for runtime objects is added later. 160 Memory(const Memory&) = delete; 161 Memory& operator=(const Memory&) = delete; 162 163 // Move semantics to remove access to the runtime object from the wrapper 164 // object that is being moved. This ensures the runtime object will be 165 // freed only once. Memory(Memory && other)166 Memory(Memory&& other) { *this = std::move(other); } 167 Memory& operator=(Memory&& other) { 168 if (this != &other) { 169 ANeuralNetworksMemory_free(mMemory); 170 mMemory = other.mMemory; 171 mValid = other.mValid; 172 other.mMemory = nullptr; 173 other.mValid = false; 174 } 175 return *this; 176 } 177 get()178 ANeuralNetworksMemory* get() const { return mMemory; } isValid()179 bool isValid() const { return mValid; } 180 181 private: 182 ANeuralNetworksMemory* mMemory = nullptr; 183 bool mValid = true; 184 }; 185 186 class Model { 187 public: Model()188 Model() { 189 // TODO handle the value returned by this call 190 ANeuralNetworksModel_create(&mModel); 191 } ~Model()192 ~Model() { ANeuralNetworksModel_free(mModel); } 193 194 // Disallow copy semantics to ensure the runtime object can only be freed 195 // once. Copy semantics could be enabled if some sort of reference counting 196 // or deep-copy system for runtime objects is added later. 197 Model(const Model&) = delete; 198 Model& operator=(const Model&) = delete; 199 200 // Move semantics to remove access to the runtime object from the wrapper 201 // object that is being moved. This ensures the runtime object will be 202 // freed only once. Model(Model && other)203 Model(Model&& other) { *this = std::move(other); } 204 Model& operator=(Model&& other) { 205 if (this != &other) { 206 ANeuralNetworksModel_free(mModel); 207 mModel = other.mModel; 208 mNextOperandId = other.mNextOperandId; 209 mValid = other.mValid; 210 other.mModel = nullptr; 211 other.mNextOperandId = 0; 212 other.mValid = false; 213 } 214 return *this; 215 } 216 finish()217 Result finish() { 218 if (mValid) { 219 auto result = static_cast<Result>(ANeuralNetworksModel_finish(mModel)); 220 if (result != Result::NO_ERROR) { 221 mValid = false; 222 } 223 return result; 224 } else { 225 return Result::BAD_STATE; 226 } 227 } 228 addOperand(const OperandType * type)229 uint32_t addOperand(const OperandType* type) { 230 if (ANeuralNetworksModel_addOperand(mModel, &(type->operandType)) != 231 ANEURALNETWORKS_NO_ERROR) { 232 mValid = false; 233 } 234 if (type->channelQuant) { 235 if (ANeuralNetworksModel_setOperandSymmPerChannelQuantParams( 236 mModel, mNextOperandId, &type->channelQuant.value().params) != 237 ANEURALNETWORKS_NO_ERROR) { 238 mValid = false; 239 } 240 } 241 return mNextOperandId++; 242 } 243 setOperandValue(uint32_t index,const void * buffer,size_t length)244 void setOperandValue(uint32_t index, const void* buffer, size_t length) { 245 if (ANeuralNetworksModel_setOperandValue(mModel, index, buffer, length) != 246 ANEURALNETWORKS_NO_ERROR) { 247 mValid = false; 248 } 249 } 250 setOperandValueFromMemory(uint32_t index,const Memory * memory,uint32_t offset,size_t length)251 void setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset, 252 size_t length) { 253 if (ANeuralNetworksModel_setOperandValueFromMemory(mModel, index, memory->get(), offset, 254 length) != ANEURALNETWORKS_NO_ERROR) { 255 mValid = false; 256 } 257 } 258 addOperation(ANeuralNetworksOperationType type,const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)259 void addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs, 260 const std::vector<uint32_t>& outputs) { 261 if (ANeuralNetworksModel_addOperation(mModel, type, static_cast<uint32_t>(inputs.size()), 262 inputs.data(), static_cast<uint32_t>(outputs.size()), 263 outputs.data()) != ANEURALNETWORKS_NO_ERROR) { 264 mValid = false; 265 } 266 } identifyInputsAndOutputs(const std::vector<uint32_t> & inputs,const std::vector<uint32_t> & outputs)267 void identifyInputsAndOutputs(const std::vector<uint32_t>& inputs, 268 const std::vector<uint32_t>& outputs) { 269 if (ANeuralNetworksModel_identifyInputsAndOutputs( 270 mModel, static_cast<uint32_t>(inputs.size()), inputs.data(), 271 static_cast<uint32_t>(outputs.size()), 272 outputs.data()) != ANEURALNETWORKS_NO_ERROR) { 273 mValid = false; 274 } 275 } 276 relaxComputationFloat32toFloat16(bool isRelax)277 void relaxComputationFloat32toFloat16(bool isRelax) { 278 if (ANeuralNetworksModel_relaxComputationFloat32toFloat16(mModel, isRelax) == 279 ANEURALNETWORKS_NO_ERROR) { 280 mRelaxed = isRelax; 281 } 282 } 283 getHandle()284 ANeuralNetworksModel* getHandle() const { return mModel; } isValid()285 bool isValid() const { return mValid; } isRelaxed()286 bool isRelaxed() const { return mRelaxed; } 287 288 protected: 289 ANeuralNetworksModel* mModel = nullptr; 290 // We keep track of the operand ID as a convenience to the caller. 291 uint32_t mNextOperandId = 0; 292 bool mValid = true; 293 bool mRelaxed = false; 294 }; 295 296 class Event { 297 public: Event()298 Event() {} ~Event()299 ~Event() { ANeuralNetworksEvent_free(mEvent); } 300 301 // Disallow copy semantics to ensure the runtime object can only be freed 302 // once. Copy semantics could be enabled if some sort of reference counting 303 // or deep-copy system for runtime objects is added later. 304 Event(const Event&) = delete; 305 Event& operator=(const Event&) = delete; 306 307 // Move semantics to remove access to the runtime object from the wrapper 308 // object that is being moved. This ensures the runtime object will be 309 // freed only once. Event(Event && other)310 Event(Event&& other) { *this = std::move(other); } 311 Event& operator=(Event&& other) { 312 if (this != &other) { 313 ANeuralNetworksEvent_free(mEvent); 314 mEvent = other.mEvent; 315 other.mEvent = nullptr; 316 } 317 return *this; 318 } 319 wait()320 Result wait() { return static_cast<Result>(ANeuralNetworksEvent_wait(mEvent)); } 321 322 // Only for use by Execution set(ANeuralNetworksEvent * newEvent)323 void set(ANeuralNetworksEvent* newEvent) { 324 ANeuralNetworksEvent_free(mEvent); 325 mEvent = newEvent; 326 } 327 328 private: 329 ANeuralNetworksEvent* mEvent = nullptr; 330 }; 331 332 class Compilation { 333 public: Compilation(const Model * model)334 Compilation(const Model* model) { 335 int result = ANeuralNetworksCompilation_create(model->getHandle(), &mCompilation); 336 if (result != 0) { 337 // TODO Handle the error 338 } 339 } 340 ~Compilation()341 ~Compilation() { ANeuralNetworksCompilation_free(mCompilation); } 342 343 // Disallow copy semantics to ensure the runtime object can only be freed 344 // once. Copy semantics could be enabled if some sort of reference counting 345 // or deep-copy system for runtime objects is added later. 346 Compilation(const Compilation&) = delete; 347 Compilation& operator=(const Compilation&) = delete; 348 349 // Move semantics to remove access to the runtime object from the wrapper 350 // object that is being moved. This ensures the runtime object will be 351 // freed only once. Compilation(Compilation && other)352 Compilation(Compilation&& other) { *this = std::move(other); } 353 Compilation& operator=(Compilation&& other) { 354 if (this != &other) { 355 ANeuralNetworksCompilation_free(mCompilation); 356 mCompilation = other.mCompilation; 357 other.mCompilation = nullptr; 358 } 359 return *this; 360 } 361 setPreference(ExecutePreference preference)362 Result setPreference(ExecutePreference preference) { 363 return static_cast<Result>(ANeuralNetworksCompilation_setPreference( 364 mCompilation, static_cast<int32_t>(preference))); 365 } 366 setCaching(const std::string & cacheDir,const std::vector<uint8_t> & token)367 Result setCaching(const std::string& cacheDir, const std::vector<uint8_t>& token) { 368 if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) { 369 return Result::BAD_DATA; 370 } 371 return static_cast<Result>(ANeuralNetworksCompilation_setCaching( 372 mCompilation, cacheDir.c_str(), token.data())); 373 } 374 finish()375 Result finish() { return static_cast<Result>(ANeuralNetworksCompilation_finish(mCompilation)); } 376 getHandle()377 ANeuralNetworksCompilation* getHandle() const { return mCompilation; } 378 379 private: 380 ANeuralNetworksCompilation* mCompilation = nullptr; 381 }; 382 383 class Execution { 384 public: Execution(const Compilation * compilation)385 Execution(const Compilation* compilation) { 386 int result = ANeuralNetworksExecution_create(compilation->getHandle(), &mExecution); 387 if (result != 0) { 388 // TODO Handle the error 389 } 390 } 391 ~Execution()392 ~Execution() { ANeuralNetworksExecution_free(mExecution); } 393 394 // Disallow copy semantics to ensure the runtime object can only be freed 395 // once. Copy semantics could be enabled if some sort of reference counting 396 // or deep-copy system for runtime objects is added later. 397 Execution(const Execution&) = delete; 398 Execution& operator=(const Execution&) = delete; 399 400 // Move semantics to remove access to the runtime object from the wrapper 401 // object that is being moved. This ensures the runtime object will be 402 // freed only once. Execution(Execution && other)403 Execution(Execution&& other) { *this = std::move(other); } 404 Execution& operator=(Execution&& other) { 405 if (this != &other) { 406 ANeuralNetworksExecution_free(mExecution); 407 mExecution = other.mExecution; 408 other.mExecution = nullptr; 409 } 410 return *this; 411 } 412 413 Result setInput(uint32_t index, const void* buffer, size_t length, 414 const ANeuralNetworksOperandType* type = nullptr) { 415 return static_cast<Result>( 416 ANeuralNetworksExecution_setInput(mExecution, index, type, buffer, length)); 417 } 418 419 Result setInputFromMemory(uint32_t index, const Memory* memory, uint32_t offset, 420 uint32_t length, const ANeuralNetworksOperandType* type = nullptr) { 421 return static_cast<Result>(ANeuralNetworksExecution_setInputFromMemory( 422 mExecution, index, type, memory->get(), offset, length)); 423 } 424 425 Result setOutput(uint32_t index, void* buffer, size_t length, 426 const ANeuralNetworksOperandType* type = nullptr) { 427 return static_cast<Result>( 428 ANeuralNetworksExecution_setOutput(mExecution, index, type, buffer, length)); 429 } 430 431 Result setOutputFromMemory(uint32_t index, const Memory* memory, uint32_t offset, 432 uint32_t length, const ANeuralNetworksOperandType* type = nullptr) { 433 return static_cast<Result>(ANeuralNetworksExecution_setOutputFromMemory( 434 mExecution, index, type, memory->get(), offset, length)); 435 } 436 startCompute(Event * event)437 Result startCompute(Event* event) { 438 ANeuralNetworksEvent* ev = nullptr; 439 Result result = static_cast<Result>(ANeuralNetworksExecution_startCompute(mExecution, &ev)); 440 event->set(ev); 441 return result; 442 } 443 compute()444 Result compute() { return static_cast<Result>(ANeuralNetworksExecution_compute(mExecution)); } 445 getOutputOperandDimensions(uint32_t index,std::vector<uint32_t> * dimensions)446 Result getOutputOperandDimensions(uint32_t index, std::vector<uint32_t>* dimensions) { 447 uint32_t rank = 0; 448 Result result = static_cast<Result>( 449 ANeuralNetworksExecution_getOutputOperandRank(mExecution, index, &rank)); 450 dimensions->resize(rank); 451 if ((result != Result::NO_ERROR && result != Result::OUTPUT_INSUFFICIENT_SIZE) || 452 rank == 0) { 453 return result; 454 } 455 result = static_cast<Result>(ANeuralNetworksExecution_getOutputOperandDimensions( 456 mExecution, index, dimensions->data())); 457 return result; 458 } 459 460 private: 461 ANeuralNetworksExecution* mExecution = nullptr; 462 }; 463 464 } // namespace wrapper 465 } // namespace nn 466 } // namespace android 467 468 #endif // ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H 469