1 /* 2 * Copyright (C) 2020 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_TYPES_NNAPI_TYPES_H 18 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_TYPES_NNAPI_TYPES_H 19 20 #include <android-base/chrono_utils.h> 21 #include <android-base/expected.h> 22 #include <android-base/unique_fd.h> 23 24 #include <array> 25 #include <chrono> 26 #include <limits> 27 #include <memory> 28 #include <optional> 29 #include <string> 30 #include <type_traits> 31 #include <utility> 32 #include <variant> 33 #include <vector> 34 35 #include "nnapi/OperandTypes.h" 36 #include "nnapi/OperationTypes.h" 37 #include "nnapi/Result.h" 38 39 // Forward declare AHardwareBuffer 40 extern "C" typedef struct AHardwareBuffer AHardwareBuffer; 41 42 namespace android::nn { 43 44 // Forward declarations 45 46 class IBuffer; 47 class IBurst; 48 class IDevice; 49 class IExecution; 50 class IPreparedModel; 51 struct Memory; 52 53 // Constants 54 55 constexpr float kDefaultExecTime = std::numeric_limits<float>::max(); 56 constexpr float kDefaultPowerUsage = std::numeric_limits<float>::max(); 57 constexpr uint32_t kByteSizeOfCacheToken = 32; 58 constexpr uint32_t kMaxNumberOfCacheFiles = 32; 59 60 /** 61 * Numeric values of extension operand and operation types have the 62 * following structure: 63 * - 16 high bits represent the "prefix", which corresponds uniquely to the 64 * extension name. 65 * - 16 low bits represent the type ID within the extension. 66 */ 67 constexpr uint8_t kExtensionTypeBits = 16; 68 constexpr uint8_t kExtensionPrefixBits = 16; 69 constexpr uint32_t kTypeWithinExtensionMask = 0xFFFF; 70 71 constexpr uint32_t kDefaultRequestMemoryAlignment = 64; 72 constexpr uint32_t kDefaultRequestMemoryPadding = 64; 73 constexpr uint32_t kMinMemoryAlignment = alignof(std::max_align_t); 74 constexpr uint32_t kMinMemoryPadding = 1; 75 constexpr auto kLoopTimeoutDefault = std::chrono::seconds{2}; 76 constexpr auto kLoopTimeoutMaximum = std::chrono::seconds{15}; 77 78 // Aliases 79 80 using SharedBuffer = std::shared_ptr<const IBuffer>; 81 using SharedBurst = std::shared_ptr<const IBurst>; 82 using SharedDevice = std::shared_ptr<const IDevice>; 83 using SharedExecution = std::shared_ptr<const IExecution>; 84 using SharedMemory = std::shared_ptr<const Memory>; 85 using SharedPreparedModel = std::shared_ptr<const IPreparedModel>; 86 87 // Canonical types 88 89 /** 90 * Status of a device. 91 */ 92 enum class DeviceStatus { 93 AVAILABLE = 0, 94 BUSY = 1, 95 OFFLINE = 2, 96 UNKNOWN = 3, 97 }; 98 99 /** 100 * Execution preferences. 101 */ 102 enum class ExecutionPreference { 103 /** 104 * Prefer executing in a way that minimizes battery drain. 105 * This is desirable for compilations that will be executed often. 106 */ 107 LOW_POWER = 0, 108 /** 109 * Prefer returning a single answer as fast as possible, even if this causes 110 * more power consumption. 111 */ 112 FAST_SINGLE_ANSWER = 1, 113 /** 114 * Prefer maximizing the throughput of successive frames, for example when 115 * processing successive frames coming from the camera. 116 */ 117 SUSTAINED_SPEED = 2, 118 DEFAULT = FAST_SINGLE_ANSWER, 119 }; 120 121 /** 122 * Device types. 123 * 124 * The type of NNAPI device. 125 */ 126 enum class DeviceType { 127 /** The device type cannot be provided. */ 128 UNKNOWN = 0, 129 /** The device does not fall into any category below. */ 130 OTHER = 1, 131 /** The device runs NNAPI models on single or multi-core CPU. */ 132 CPU = 2, 133 /** The device can run NNAPI models and also accelerate graphics APIs such 134 * as OpenGL ES and Vulkan. */ 135 GPU = 3, 136 /** Dedicated accelerator for Machine Learning workloads. */ 137 ACCELERATOR = 4, 138 }; 139 140 /** 141 * Specifies whether or not to measure timing information during execution. 142 */ 143 enum class MeasureTiming { 144 NO = 0, 145 YES = 1, 146 }; 147 148 /** 149 * Priority given to a prepared model for execution. 150 */ 151 enum class Priority { 152 LOW = 0, 153 MEDIUM = 1, 154 HIGH = 2, 155 DEFAULT = MEDIUM, 156 }; 157 158 // TODO: Should more errors from NeuralNetworks.h be incorporated? The left name shows errors that 159 // appear in NeuralNetworks.h but not in the HAL, and the right column shows what these values could 160 // map to: 161 // * OUT_OF_MEMORY ==> GENERAL_FAILURE / RESOURCE_EXHAUSTED_* 162 // * INCOMPLETE ==> GENERAL_FAILURE 163 // * UNEXPECTED_NULL ==> INVALID_ARGUMENT 164 // * UNMAPPABLE ==> GENERAL_FAILURE 165 // * BAD_STATE ==> INVALID_ARGUMENT 166 enum class ErrorStatus { 167 NONE = 0, 168 DEVICE_UNAVAILABLE = 1, 169 GENERAL_FAILURE = 2, 170 OUTPUT_INSUFFICIENT_SIZE = 3, 171 INVALID_ARGUMENT = 4, 172 MISSED_DEADLINE_TRANSIENT = 5, 173 MISSED_DEADLINE_PERSISTENT = 6, 174 RESOURCE_EXHAUSTED_TRANSIENT = 7, 175 RESOURCE_EXHAUSTED_PERSISTENT = 8, 176 DEAD_OBJECT = 10000, 177 }; 178 179 struct GeneralError { 180 // NOLINTNEXTLINE(google-explicit-constructor) 181 /*implicit*/ GeneralError(std::string message = {}, 182 ErrorStatus code = ErrorStatus::GENERAL_FAILURE); 183 184 std::string message; 185 ErrorStatus code; 186 }; 187 188 template <typename Type> 189 using GeneralResult = base::expected<Type, GeneralError>; 190 191 /** 192 * Fused activation function types. 193 */ 194 enum class FusedActivationFunc : int32_t { 195 /** NO fused activation function. */ 196 NONE = 0, 197 /** Fused ReLU activation function. */ 198 RELU = 1, 199 /** Fused ReLU1 activation function. */ 200 RELU1 = 2, 201 /** Fused ReLU6 activation function. */ 202 RELU6 = 3, 203 }; 204 205 using Dimension = uint32_t; 206 using Dimensions = std::vector<Dimension>; 207 208 using CacheToken = std::array<uint8_t, kByteSizeOfCacheToken>; 209 210 /** 211 * Describes the shape information of an output operand after execution. 212 */ 213 struct OutputShape { 214 /** 215 * Dimensions of the operand. 216 */ 217 std::vector<uint32_t> dimensions; 218 219 /** 220 * Whether the provided buffer size is sufficient for the output. 221 */ 222 bool isSufficient = false; 223 }; 224 225 struct ExecutionError { 226 // NOLINTNEXTLINE(google-explicit-constructor) 227 /*implicit*/ ExecutionError(std::string message = {}, 228 ErrorStatus code = ErrorStatus::GENERAL_FAILURE, 229 std::vector<OutputShape> outputShapes = {}); 230 231 // NOLINTNEXTLINE(google-explicit-constructor) 232 /*implicit*/ ExecutionError(GeneralError error); 233 234 std::string message; 235 ErrorStatus code; 236 // OutputShapes for code == OUTPUT_INSUFFICIENT_SIZE 237 std::vector<OutputShape> outputShapes; 238 }; 239 240 template <typename Type> 241 using ExecutionResult = base::expected<Type, ExecutionError>; 242 243 /** 244 * The capabilities of a driver. 245 * 246 * This represents performance of non-extension operations. 247 * 248 * Performance of an operation other than {@link OperationType::IF} and 249 * {@link OperationType::WHILE} comes from the type of its first operand. 250 */ 251 struct Capabilities { 252 /** 253 * Performance information for the reference workload. 254 * 255 * Used by a driver to report its performance characteristics. 256 */ 257 struct PerformanceInfo { 258 /** 259 * Ratio of the time taken by the driver to execute the 260 * workload compared to the time the CPU would take for the 261 * same workload. A lower number is better. 262 */ 263 float execTime = kDefaultExecTime; 264 265 /** 266 * Ratio of the energy used by the driver compared to what 267 * the CPU would use for doing the same workload. A lower number 268 * is better. 269 */ 270 float powerUsage = kDefaultPowerUsage; 271 }; 272 273 /** 274 * Driver performance when operating on a particular data type. 275 * In the case of float32 data, this is used when the calculations 276 * are not relaxed. 277 */ 278 struct OperandPerformance { 279 OperandType type{}; 280 PerformanceInfo info; 281 }; 282 283 class OperandPerformanceTable { 284 public: 285 static Result<OperandPerformanceTable> create( 286 std::vector<OperandPerformance> operandPerformances); 287 288 PerformanceInfo lookup(OperandType type) const; 289 const std::vector<OperandPerformance>& asVector() const; 290 291 private: 292 explicit OperandPerformanceTable(std::vector<OperandPerformance> operandPerformances); 293 std::vector<OperandPerformance> mSorted; 294 }; 295 296 /** 297 * Driver performance when operating on float32 data but performing 298 * calculations with range and/or precision as low as that of the IEEE 299 * 754 16-bit floating-point format. 300 */ 301 PerformanceInfo relaxedFloat32toFloat16PerformanceScalar; 302 PerformanceInfo relaxedFloat32toFloat16PerformanceTensor; 303 304 /** 305 * Performance by operand type. Must be sorted by OperandType. 306 * 307 * If a particular {@link OperandType} is not present in operandPerformance, 308 * its performance is treated as 309 * { .execTime = FLT_MAX, .powerUsage = FLT_MAX }. 310 * 311 * Performance does not apply to {@link OperandType::SUBGRAPH}, and a driver 312 * must not report operand performance for {@link OperandType::SUBGRAPH}. 313 */ 314 OperandPerformanceTable operandPerformance; 315 316 /** 317 * Performance of an {@link OperationType::IF} operation is the sum of 318 * {@link Capabilities::ifPerformance} and the mean of performance for the 319 * two branch subgraphs, where performance for a subgraph is the sum of the 320 * performance of all operations within the subgraph. 321 */ 322 PerformanceInfo ifPerformance; 323 324 /** 325 * Performance of a {@link OperationType::WHILE} operation is the sum of 326 * {@link Capabilities::whilePerformance}, performance for the condition 327 * subgraph and performance for the body subgraph, where performance for a 328 * subgraph is the sum of the performance of all operations within the 329 * subgraph. 330 */ 331 PerformanceInfo whilePerformance; 332 }; 333 334 /** 335 * Information about an extension. 336 */ 337 struct Extension { 338 /** 339 * Information about an extension operand type. 340 */ 341 struct OperandTypeInformation { 342 /** 343 * The extension operand type. 344 */ 345 uint16_t type = 0; 346 347 /** 348 * Indicates whether the extension operand type represents a tensor or 349 * a scalar. 350 */ 351 bool isTensor = false; 352 353 /** 354 * The byte size of the operand (if scalar) or of a single element (if 355 * tensor). 356 */ 357 uint32_t byteSize = 0; 358 }; 359 360 /** 361 * The extension name. 362 * 363 * The name must consist of lowercase latin letters, numbers, periods, and 364 * underscore signs. The name must contain at least one period. 365 * 366 * The name must start with the reverse domain name of the vendor. 367 * 368 * Example: com.google.test_extension 369 */ 370 std::string name; 371 372 /** 373 * Information about operand types defined by the extension. 374 */ 375 std::vector<OperandTypeInformation> operandTypes; 376 }; 377 378 /** 379 * Describes one operation of the model's graph. 380 */ 381 struct Operation { 382 /** 383 * The operation type. 384 */ 385 OperationType type{}; 386 387 /** 388 * Describes the table that contains the indexes of the inputs of the 389 * operation. The offset is the index in the operandIndexes table. 390 */ 391 std::vector<uint32_t> inputs; 392 393 /** 394 * Describes the table that contains the indexes of the outputs of the 395 * operation. The offset is the index in the operandIndexes table. 396 */ 397 std::vector<uint32_t> outputs; 398 }; 399 400 /** 401 * Describes the location of a data object. 402 */ 403 struct DataLocation { 404 /** 405 * The address of the memory where the data is found. 406 * 407 * This field is only active when lifetime is POINTER. 408 */ 409 std::variant<const void*, void*> pointer; 410 411 /** 412 * The index of the memory pool where this location is found. 413 */ 414 uint32_t poolIndex = 0; 415 416 /** 417 * Offset in bytes from the start of the pool. 418 */ 419 uint32_t offset = 0; 420 421 /** 422 * The length of the data in bytes. 423 */ 424 uint32_t length = 0; 425 426 /** 427 * The end padding of the specified memory region in bytes. 428 */ 429 uint32_t padding = 0; 430 }; 431 432 /** 433 * Describes one operand of the model's graph. 434 */ 435 struct Operand { 436 /** 437 * How an operand is used. 438 */ 439 enum class LifeTime { 440 /** 441 * The operand is internal to the model. It's created by an operation and 442 * consumed by other operations. It must be an output operand of 443 * exactly one operation. 444 */ 445 TEMPORARY_VARIABLE = 0, 446 447 /** 448 * The operand is an input of a subgraph. It must not be an output 449 * operand of any operation. 450 * 451 * An operand can't be both input and output of a subgraph. 452 */ 453 SUBGRAPH_INPUT = 1, 454 455 /** 456 * The operand is an output of a subgraph. It must be an output 457 * operand of exactly one operation. 458 * 459 * An operand can't be both input and output of a subgraph. 460 */ 461 SUBGRAPH_OUTPUT = 2, 462 463 /** 464 * The operand is a constant found in Model::operandValues. It must 465 * not be an output operand of any operation. 466 */ 467 CONSTANT_COPY = 3, 468 469 /** 470 * The operand is a constant that was specified via a Memory 471 * object. It must not be an output operand of any operation. 472 */ 473 CONSTANT_REFERENCE = 4, 474 475 /** 476 * The operand does not have a value. This is valid only for optional 477 * arguments of operations. 478 */ 479 NO_VALUE = 5, 480 481 /** 482 * The operand is a reference to a subgraph. It must be an input to one 483 * or more {@link OperationType::IF} or {@link OperationType::WHILE} 484 * operations. 485 */ 486 SUBGRAPH = 6, 487 488 /** 489 * This operand is a constant found in a user buffer. It must not be an 490 * output operand of any operation. 491 */ 492 POINTER = 7, 493 }; 494 495 /** 496 * No additional parameters. 497 */ 498 using NoParams = std::monostate; 499 500 /** 501 * Parameters for TENSOR_QUANT8_SYMM_PER_CHANNEL operand. 502 */ 503 struct SymmPerChannelQuantParams { 504 /** Array of scaling values for each channel. Each value must be greater than zero. */ 505 std::vector<float> scales; 506 /** Index of the channel dimension */ 507 uint32_t channelDim = 0; 508 }; 509 510 /** 511 * Extension operand parameters. 512 * 513 * The framework treats this as an opaque data blob. 514 * The format is up to individual extensions. 515 */ 516 using ExtensionParams = std::vector<uint8_t>; 517 518 /** 519 * Additional parameters specific to a particular operand type. 520 */ 521 using ExtraParams = std::variant<NoParams, SymmPerChannelQuantParams, ExtensionParams>; 522 523 /** 524 * The data type. 525 * 526 * Besides the values listed in {@link OperationType}, any value equal or over 527 * (1 << kExtensionTypeBits) is possible and should be interpreted 528 * as an extension type according to {@link Model::extensionNameToPrefix}. 529 */ 530 OperandType type{}; 531 532 /** 533 * Dimensions of the operand. 534 * 535 * For a scalar operand, dimensions.size() must be 0. 536 * 537 * A tensor operand with all dimensions specified has "fully 538 * specified" dimensions. Whenever possible (i.e., whenever the 539 * dimensions are known at model construction time), a tensor 540 * operand should have (but is not required to have) fully 541 * specified dimensions, in order to enable the best possible 542 * performance. 543 * 544 * If a tensor operand's dimensions are not fully specified, the 545 * dimensions of the operand are deduced from the operand 546 * dimensions and values of the operation for which that operand 547 * is an output or from the corresponding {@link OperationType::IF} or 548 * {@link OperationType::WHILE} operation input operand dimensions in the 549 * case of referenced subgraph input operands. 550 * 551 * In the following situations, a tensor operand's dimensions must 552 * be fully specified: 553 * 554 * - The operand has lifetime CONSTANT_COPY, CONSTANT_REFERENCE, or 555 * POINTER. 556 * 557 * - The operand has lifetime SUBGRAPH_INPUT and belongs to the main 558 * subgraph. Fully specified dimensions must either be present in the 559 * Operand or they must be provided in the corresponding 560 * RequestArgument. 561 * EXCEPTION: If the input is optional and omitted 562 * (by setting the hasNoValue field of the corresponding 563 * RequestArgument to true) then it need not have fully 564 * specified dimensions. 565 * 566 * A tensor operand with some number of unspecified dimensions is 567 * represented by setting each unspecified dimension to 0. 568 * 569 * A tensor operand with unspecified rank is represented by providing 570 * an empty dimensions vector. 571 */ 572 Dimensions dimensions; 573 574 /** 575 * Quantized scale of the operand. 576 * 577 * Must be 0 when not applicable to an operand type. 578 * 579 * See {@link OperandType}. 580 */ 581 float scale = 0.0f; 582 583 /** 584 * Quantized zero-point offset of the operand. 585 * 586 * Must be 0 when not applicable to an operand type. 587 * 588 * See {@link OperandType}. 589 */ 590 int32_t zeroPoint = 0; 591 592 /** 593 * How the operand is used. 594 */ 595 LifeTime lifetime{}; 596 597 /** 598 * Where to find the data for this operand. 599 * If the lifetime is TEMPORARY_VARIABLE, SUBGRAPH_INPUT, SUBGRAPH_OUTPUT, 600 * or NO_VALUE: 601 * - All the fields must be 0. 602 * If the lifetime is CONSTANT_COPY: 603 * - location.pointer is null. 604 * - location.poolIndex is 0. 605 * - location.offset is the offset in bytes into Model::operandValues. 606 * - location.length is set. 607 * - location.padding is 0. 608 * If the lifetime is CONSTANT_REFERENCE: 609 * - location.pointer is null. 610 * - location.poolIndex is set. 611 * - location.offset is the offset in bytes into the specified pool. 612 * - location.length is set. 613 * - location.padding is set. 614 * If the lifetime is SUBGRAPH: 615 * - location.pointer is null. 616 * - location.poolIndex is 0. 617 * - location.offset is the index of the referenced subgraph in 618 * {@link Model::referenced}. 619 * - location.length is 0. 620 * - location.padding is 0. 621 * If the lifetime is POINTER: 622 * - location.pointer is non-null. 623 * - location.poolIndex is 0. 624 * - location.offset is 0. 625 * - location.length is set. 626 * - location.padding is 0. 627 */ 628 DataLocation location; 629 630 /** 631 * Additional parameters specific to a particular operand type. 632 */ 633 ExtraParams extraParams; 634 }; 635 636 using Handle = base::unique_fd; 637 using SharedHandle = std::shared_ptr<const Handle>; 638 639 struct Memory { 640 struct Ashmem { 641 base::unique_fd fd; 642 size_t size; 643 }; 644 645 struct Fd { 646 size_t size; 647 int prot; 648 base::unique_fd fd; 649 size_t offset; 650 }; 651 652 // RAII wrapper for AHardwareBuffer 653 struct HardwareBuffer { 654 using Deleter = std::add_pointer_t<void(AHardwareBuffer*)>; 655 using Handle = std::unique_ptr<AHardwareBuffer, Deleter>; 656 Handle handle; 657 }; 658 659 struct Unknown { 660 struct Handle { 661 std::vector<base::unique_fd> fds; 662 std::vector<int> ints; 663 }; 664 Handle handle; 665 size_t size; 666 std::string name; 667 }; 668 669 std::variant<Ashmem, Fd, HardwareBuffer, Unknown> handle; 670 }; 671 672 /** 673 * The mapping between extension names and prefixes of values like operand and operation type, and 674 * token in {@link TokenValuePair}. 675 * 676 * An operand or operation whose numeric type value is above {@link IDevice::OPERAND_TYPE_BASE_MAX} 677 * or {@link IDevice::OPERATION_TYPE_BASE_MAX} respectively should be interpreted as an extension 678 * operand/operation. The low kExtensionTypeBits bits of the value correspond to the type ID within 679 * the extension and the high kExtensionPrefixBits bits encode the "prefix", which maps uniquely to 680 * the extension name. The sign bit is always 0. 681 * 682 * For example, if a model contains an operation whose value is 0x7AAABBBB and 683 * Model::extensionNameToPrefix contains an entry with prefix=0x7AAA and 684 * name="vendor.test.test_extension", then the operation should be interpreted as the operation 685 * 0xBBBB of the extension named vendor.test.test_extension. 686 * 687 * This is a one-to-one correspondence. That is, there must be at most one prefix corresponding to 688 * each extension name and at most one extension name corresponding to each prefix. 689 */ 690 struct ExtensionNameAndPrefix { 691 /** 692 * The extension name. 693 * 694 * See {@link Extension::name} for the format specification. 695 */ 696 std::string name; 697 698 /** 699 * The extension prefix. Only the lowest 15 bits are used, so the value must be less than 32768. 700 */ 701 uint16_t prefix = 0; 702 }; 703 704 /** 705 * A Neural Network Model. 706 * 707 * This includes not only the execution graph, but also constant data such as 708 * weights or scalars added at construction time. The only information that 709 * may not be known is the shape of the input tensors. 710 */ 711 struct Model { 712 /** 713 * An excerpt of the execution graph. 714 */ 715 struct Subgraph { 716 /** 717 * All operands included in the subgraph. 718 */ 719 std::vector<Operand> operands; 720 721 /** 722 * All operations included in the subgraph. 723 * 724 * The operations are sorted into execution order. Every operand 725 * with lifetime SUBGRAPH_OUTPUT or TEMPORARY_VARIABLE must be 726 * written before it is read. 727 */ 728 std::vector<Operation> operations; 729 730 /** 731 * Input indexes of the subgraph. There must be at least one. 732 * 733 * Each value corresponds to the index of the operand in "operands". 734 */ 735 std::vector<uint32_t> inputIndexes; 736 737 /** 738 * Output indexes of the subgraph. There must be at least one. 739 * 740 * Each value corresponds to the index of the operand in "operands". 741 */ 742 std::vector<uint32_t> outputIndexes; 743 }; 744 745 class OperandValues { 746 public: 747 OperandValues(); 748 OperandValues(const uint8_t* data, size_t length); 749 750 // Append a segment of memory (starting at `data` with `length` number of bytes) to the back 751 // of `OperandValues`, adding padding as necessary so that the appended data is aligned. 752 // Refer to `getAlignmentForLength` for more information on alignment (such as what the 753 // current alignments are for different data lengths). 754 DataLocation append(const uint8_t* data, size_t length); 755 756 const uint8_t* data() const; 757 size_t size() const; 758 759 private: 760 std::vector<uint8_t> mData; 761 }; 762 763 /** 764 * The top-level subgraph. 765 */ 766 Subgraph main; 767 768 /** 769 * Referenced subgraphs. 770 * 771 * Each subgraph is referenced by the main subgraph or at least one other 772 * referenced subgraph. 773 * 774 * There must be no reference cycles. 775 */ 776 std::vector<Subgraph> referenced; 777 778 /** 779 * A byte buffer containing operand data that were copied into the model. 780 * 781 * An operand's value must be located here if and only if Operand::lifetime 782 * equals Operand::LifeTime::CONSTANT_COPY. 783 */ 784 OperandValues operandValues; 785 786 /** 787 * A collection of shared memory pools containing operand values. 788 * 789 * An operand's value must be located here if and only if Operand::lifetime 790 * equals Operand::LifeTime::CONSTANT_REFERENCE. 791 */ 792 std::vector<SharedMemory> pools; 793 794 /** 795 * 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or 796 * precision as low as that of the IEEE 754 16-bit floating-point format. 797 * 'false' indicates TENSOR_FLOAT32 must be calculated using at least the 798 * range and precision of the IEEE 754 32-bit floating-point format. 799 */ 800 bool relaxComputationFloat32toFloat16 = false; 801 802 /** 803 * The mapping between extension names and prefixes of operand and 804 * operation type values. 805 * 806 * An operand or operation whose numeric type value is equal to or greater 807 * than (1 << kExtensionTypeBits) should be interpreted 808 * as an extension operand. The low 809 * {@link kExtensionTypeBits} bits of the value correspond to the type ID 810 * within the extension and the high {@link kExtensionPrefixBits} bits encode 811 * the "prefix", which maps uniquely to the extension name. 812 * 813 * For example, if a model contains an operation whose value is 814 * 0xAAAABBBB and extensionNameToPrefix contains an entry with 815 * prefix=0xAAAA and name="vendor.test.test_extension", then 816 * the operation should be interpreted as the operation 0xBBBB 817 * of the extension named vendor.test.test_extension. 818 * 819 * This is a one-to-one correspondence. That is, there must be at most one 820 * prefix corresponding to each extension name and at most one extension 821 * name corresponding to each prefix. 822 */ 823 std::vector<ExtensionNameAndPrefix> extensionNameToPrefix; 824 }; 825 826 /** 827 * A buffer descriptor. Describes the properties of a buffer. 828 */ 829 struct BufferDesc { 830 /** 831 * Dimensions of the buffer. May have unknown dimensions or rank. A buffer with some number 832 * of unspecified dimensions is represented by setting each unspecified dimension to 0. A 833 * buffer with unspecified rank is represented by providing an empty dimensions vector. 834 */ 835 Dimensions dimensions; 836 }; 837 838 /** 839 * Describes a role of an input or output to a prepared model. 840 */ 841 struct BufferRole { 842 /** 843 * The index of the IPreparedModel within the "preparedModel" argument passed in 844 * IDevice::allocate. 845 */ 846 uint32_t modelIndex = 0; 847 848 /** 849 * The index of the input or output operand. 850 */ 851 uint32_t ioIndex = 0; 852 853 /** 854 * A floating-point value within the range (0.0, 1.0]. Describes how likely the 855 * buffer is to be used in the specified role. This is provided as a hint to 856 * optimize the case when multiple roles prefer different buffer locations or data 857 * layouts. 858 */ 859 float probability = 0.0f; 860 }; 861 862 /** 863 * Inputs to be sent to and outputs to be retrieved from a prepared model. 864 * 865 * A Request serves two primary tasks: 866 * 1) Provides the input and output data to be used when executing the model. 867 * 2) Specifies any updates to the input operand metadata that were left 868 * unspecified at model preparation time. 869 * 870 * An output must not overlap with any other output, with an input, or 871 * with an operand of lifetime CONSTANT_REFERENCE. 872 */ 873 struct Request { 874 /** 875 * Metadata information specifying the location of the input or output data and 876 * any updates to the input or output operand. 877 */ 878 struct Argument { 879 enum class LifeTime { 880 POOL = 0, 881 NO_VALUE = 1, 882 POINTER = 2, 883 }; 884 885 LifeTime lifetime{}; 886 887 /** 888 * The location within one of the memory pools passed in the Request. 889 */ 890 DataLocation location; 891 892 /** 893 * Updated dimension information. 894 * 895 * If dimensions.size() > 0, dimension information was provided 896 * along with the argument. This can be the case for models that 897 * accept inputs of varying size. This can't change the rank, just 898 * the value of the dimensions that were unspecified in the 899 * model. If dimensions.size() > 0, then all dimensions must be 900 * specified here; and any dimension that was specified in the 901 * model must have the same value here. 902 * 903 * If the dimensions in the model are not fully specified, then 904 * they must be fully specified here, unless hasNoValue is set to 905 * true. If the dimensions in the model are fully specified, then 906 * either dimensions.size() may be 0, or the dimensions in the 907 * model must be identical to the dimensions here. 908 */ 909 Dimensions dimensions; 910 }; 911 912 /** 913 * Specifies a driver-managed buffer. It is the token corresponding to an 914 * IBuffer returned from IDevice::allocate, and is specific to the IDevice 915 * object. 916 */ 917 enum class MemoryDomainToken : uint32_t {}; 918 919 /** 920 * A memory pool. 921 */ 922 using MemoryPool = std::variant<SharedMemory, MemoryDomainToken, SharedBuffer>; 923 924 /** 925 * Input data and information to be used in the execution of a prepared 926 * model. 927 * 928 * The index of the input corresponds to the index in Model::main::inputIndexes. 929 * E.g., inputs[i] corresponds to Model::main::inputIndexes[i]. 930 */ 931 std::vector<Argument> inputs; 932 933 /** 934 * Output data and information to be used in the execution of a prepared 935 * model. 936 * 937 * The index of the output corresponds to the index in Model::main::outputIndexes. 938 * E.g., outputs[i] corresponds to Model::main::outputIndexes[i]. 939 */ 940 std::vector<Argument> outputs; 941 942 /** 943 * A collection of memory pools containing operand data for both the 944 * inputs and the outputs to a model. 945 */ 946 std::vector<MemoryPool> pools; 947 }; 948 949 // Representation of sync_fence. 950 class SyncFence { 951 public: 952 static SyncFence createAsSignaled(); 953 static SyncFence create(base::unique_fd fd); 954 static Result<SyncFence> create(SharedHandle syncFence); 955 956 // The function syncWait() has the same semantics as the system function 957 // ::sync_wait(), except that the syncWait() return value is semantically 958 // richer. 959 enum class FenceState { 960 ACTIVE, // fence has not been signaled 961 SIGNALED, // fence has been signaled 962 ERROR, // fence has been placed in the error state 963 UNKNOWN, // either bad argument passed to syncWait(), or internal error 964 }; 965 using Timeout = std::chrono::duration<int, std::milli>; 966 using OptionalTimeout = std::optional<Timeout>; 967 968 FenceState syncWait(OptionalTimeout optionalTimeout) const; 969 970 SharedHandle getSharedHandle() const; 971 bool hasFd() const; 972 int getFd() const; 973 974 private: 975 explicit SyncFence(SharedHandle syncFence); 976 977 SharedHandle mSyncFence; 978 }; 979 980 using Clock = base::boot_clock; 981 982 using Duration = std::chrono::nanoseconds; 983 using OptionalDuration = std::optional<Duration>; 984 985 using TimePoint = std::chrono::time_point<Clock, Duration>; 986 using OptionalTimePoint = std::optional<TimePoint>; 987 988 /** 989 * Timing information measured during execution. Each time is a duration from 990 * the beginning of some task to the end of that task, including time when that 991 * task is not active (for example, preempted by some other task, or 992 * waiting for some resource to become available). 993 * 994 * Times are measured in nanoseconds. 995 */ 996 struct Timing { 997 /** Execution time on device (not driver, which runs on host processor). */ 998 OptionalDuration timeOnDevice; 999 /** Execution time in driver (including time on device). */ 1000 OptionalDuration timeInDriver; 1001 }; 1002 1003 // Returns status, timingLaunched, timingFenced 1004 using ExecuteFencedInfoCallback = std::function<GeneralResult<std::pair<Timing, Timing>>()>; 1005 1006 // Version is a tuple that contains what NNAPI feature level is supported/required and whether 1007 // runtime-only features are supported/required. 1008 struct Version { 1009 enum class Level : uint8_t { 1010 FEATURE_LEVEL_1, 1011 FEATURE_LEVEL_2, 1012 FEATURE_LEVEL_3, 1013 FEATURE_LEVEL_4, 1014 FEATURE_LEVEL_5, 1015 FEATURE_LEVEL_6, 1016 FEATURE_LEVEL_7, 1017 FEATURE_LEVEL_8, 1018 #ifdef NN_EXPERIMENTAL_FEATURE 1019 FEATURE_LEVEL_EXPERIMENTAL, 1020 #endif // NN_EXPERIMENTAL_FEATURE 1021 }; 1022 1023 Level level; 1024 bool runtimeOnlyFeatures = false; 1025 }; 1026 1027 constexpr auto kVersionFeatureLevel1 = Version{.level = Version::Level::FEATURE_LEVEL_1}; 1028 constexpr auto kVersionFeatureLevel2 = Version{.level = Version::Level::FEATURE_LEVEL_2}; 1029 constexpr auto kVersionFeatureLevel3 = Version{.level = Version::Level::FEATURE_LEVEL_3}; 1030 constexpr auto kVersionFeatureLevel4 = Version{.level = Version::Level::FEATURE_LEVEL_4}; 1031 constexpr auto kVersionFeatureLevel5 = Version{.level = Version::Level::FEATURE_LEVEL_5}; 1032 constexpr auto kVersionFeatureLevel6 = Version{.level = Version::Level::FEATURE_LEVEL_6}; 1033 constexpr auto kVersionFeatureLevel7 = Version{.level = Version::Level::FEATURE_LEVEL_7}; 1034 constexpr auto kVersionFeatureLevel8 = Version{.level = Version::Level::FEATURE_LEVEL_8}; 1035 #ifdef NN_EXPERIMENTAL_FEATURE 1036 constexpr auto kVersionFeatureLevelExperimental = 1037 Version{.level = Version::Level::FEATURE_LEVEL_EXPERIMENTAL}; 1038 #endif // NN_EXPERIMENTAL_FEATURE 1039 1040 // Describes the memory preference of an operand. 1041 struct MemoryPreference { 1042 // Must be a power of 2. 1043 // For pointer buffers, the alignment is satisfied if the address of the pointer is a multiple 1044 // of the "alignment" value. For memory pools, the alignment is satisfied if the offset of the 1045 // sub-region specified by DataLocation is a multiple of the "alignment" value. 1046 uint32_t alignment; 1047 // Must be a power of 2. 1048 // For both pointer buffers and memory pools, the padding is satisfied if the padded length is 1049 // greater than or equal to the raw size of the operand (i.e. the size of an element multiplied 1050 // by the number of elements) rounding up to a multiple of the "padding" value. In DataLocation, 1051 // the padded length equals to the sum of the length and padding fields. 1052 uint32_t padding; 1053 }; 1054 1055 /** 1056 * A type that is used to represent a token / byte array data pair. 1057 */ 1058 struct TokenValuePair { 1059 /** 1060 * A 32bit integer token. The token is created by combining the 1061 * extension prefix and enum defined within the extension. Of the 32 bits in the token, the high 1062 * kExtensionPrefixBits bits is the extension prefix and the low kExtensionTypeBits bits 1063 * represents the enum within the extension. 1064 * 1065 * For example, if a token value is 0x7AAA000B and corresponding {@link ExtensionNameAndPrefix} 1066 * contains an entry with prefix=0x7AAA and name="vendor.test.test_extension", then the token 1067 * should be interpreted as the enum value 0x000B of the extension named 1068 * vendor.test.test_extension. 1069 */ 1070 int32_t token; 1071 /** 1072 * A byte array containing the raw data. 1073 */ 1074 std::vector<uint8_t> value; 1075 }; 1076 1077 } // namespace android::nn 1078 1079 #endif // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_TYPES_NNAPI_TYPES_H 1080