1 /* 2 * Copyright (C) 2020 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_NNAPI_TYPES_H 18 #define ANDROID_FRAMEWORKS_ML_NN_COMMON_NNAPI_TYPES_H 19 20 #include <android-base/chrono_utils.h> 21 #include <android-base/expected.h> 22 #include <android-base/unique_fd.h> 23 24 #include <array> 25 #include <chrono> 26 #include <limits> 27 #include <memory> 28 #include <optional> 29 #include <string> 30 #include <type_traits> 31 #include <utility> 32 #include <variant> 33 #include <vector> 34 35 #include "nnapi/OperandTypes.h" 36 #include "nnapi/OperationTypes.h" 37 #include "nnapi/Result.h" 38 39 // Forward declare AHardwareBuffer 40 extern "C" typedef struct AHardwareBuffer AHardwareBuffer; 41 42 namespace android::nn { 43 44 // Forward declarations 45 46 class IBuffer; 47 class IBurst; 48 class IDevice; 49 class IExecution; 50 class IPreparedModel; 51 struct Memory; 52 53 // Constants 54 55 constexpr float kDefaultExecTime = std::numeric_limits<float>::max(); 56 constexpr float kDefaultPowerUsage = std::numeric_limits<float>::max(); 57 constexpr uint32_t kByteSizeOfCacheToken = 32; 58 constexpr uint32_t kMaxNumberOfCacheFiles = 32; 59 60 /** 61 * Numeric values of extension operand and operation types have the 62 * following structure: 63 * - 16 high bits represent the "prefix", which corresponds uniquely to the 64 * extension name. 65 * - 16 low bits represent the type ID within the extension. 66 */ 67 constexpr uint8_t kExtensionTypeBits = 16; 68 constexpr uint8_t kExtensionPrefixBits = 16; 69 constexpr uint32_t kTypeWithinExtensionMask = 0xFFFF; 70 71 constexpr uint32_t kDefaultRequestMemoryAlignment = 64; 72 constexpr uint32_t kDefaultRequestMemoryPadding = 64; 73 constexpr uint32_t kMinMemoryAlignment = alignof(std::max_align_t); 74 constexpr uint32_t kMinMemoryPadding = 1; 75 constexpr auto kLoopTimeoutDefault = std::chrono::seconds{2}; 76 constexpr auto kLoopTimeoutMaximum = std::chrono::seconds{15}; 77 78 // Aliases 79 80 using SharedBuffer = std::shared_ptr<const IBuffer>; 81 using SharedBurst = std::shared_ptr<const IBurst>; 82 using SharedDevice = std::shared_ptr<const IDevice>; 83 using SharedExecution = std::shared_ptr<const IExecution>; 84 using SharedMemory = std::shared_ptr<const Memory>; 85 using SharedPreparedModel = std::shared_ptr<const IPreparedModel>; 86 87 // Canonical types 88 89 /** 90 * Status of a device. 91 */ 92 enum class DeviceStatus { 93 AVAILABLE = 0, 94 BUSY = 1, 95 OFFLINE = 2, 96 UNKNOWN = 3, 97 }; 98 99 /** 100 * Execution preferences. 101 */ 102 enum class ExecutionPreference { 103 /** 104 * Prefer executing in a way that minimizes battery drain. 105 * This is desirable for compilations that will be executed often. 106 */ 107 LOW_POWER = 0, 108 /** 109 * Prefer returning a single answer as fast as possible, even if this causes 110 * more power consumption. 111 */ 112 FAST_SINGLE_ANSWER = 1, 113 /** 114 * Prefer maximizing the throughput of successive frames, for example when 115 * processing successive frames coming from the camera. 116 */ 117 SUSTAINED_SPEED = 2, 118 DEFAULT = FAST_SINGLE_ANSWER, 119 }; 120 121 /** 122 * Device types. 123 * 124 * The type of NNAPI device. 125 */ 126 enum class DeviceType { 127 /** The device type cannot be provided. */ 128 UNKNOWN = 0, 129 /** The device does not fall into any category below. */ 130 OTHER = 1, 131 /** The device runs NNAPI models on single or multi-core CPU. */ 132 CPU = 2, 133 /** The device can run NNAPI models and also accelerate graphics APIs such 134 * as OpenGL ES and Vulkan. */ 135 GPU = 3, 136 /** Dedicated accelerator for Machine Learning workloads. */ 137 ACCELERATOR = 4, 138 }; 139 140 /** 141 * Specifies whether or not to measure timing information during execution. 142 */ 143 enum class MeasureTiming { 144 NO = 0, 145 YES = 1, 146 }; 147 148 /** 149 * Priority given to a prepared model for execution. 150 */ 151 enum class Priority { 152 LOW = 0, 153 MEDIUM = 1, 154 HIGH = 2, 155 DEFAULT = MEDIUM, 156 }; 157 158 // TODO: Should more errors from NeuralNetworks.h be incorporated? The left name shows errors that 159 // appear in NeuralNetworks.h but not in the HAL, and the right column shows what these values could 160 // map to: 161 // * OUT_OF_MEMORY ==> GENERAL_FAILURE / RESOURCE_EXHAUSTED_* 162 // * INCOMPLETE ==> GENERAL_FAILURE 163 // * UNEXPECTED_NULL ==> INVALID_ARGUMENT 164 // * UNMAPPABLE ==> GENERAL_FAILURE 165 // * BAD_STATE ==> INVALID_ARGUMENT 166 enum class ErrorStatus { 167 NONE = 0, 168 DEVICE_UNAVAILABLE = 1, 169 GENERAL_FAILURE = 2, 170 OUTPUT_INSUFFICIENT_SIZE = 3, 171 INVALID_ARGUMENT = 4, 172 MISSED_DEADLINE_TRANSIENT = 5, 173 MISSED_DEADLINE_PERSISTENT = 6, 174 RESOURCE_EXHAUSTED_TRANSIENT = 7, 175 RESOURCE_EXHAUSTED_PERSISTENT = 8, 176 DEAD_OBJECT = 10000, 177 }; 178 179 struct GeneralError { 180 std::string message; 181 ErrorStatus code = ErrorStatus::GENERAL_FAILURE; 182 }; 183 184 template <typename Type> 185 using GeneralResult = base::expected<Type, GeneralError>; 186 187 /** 188 * Fused activation function types. 189 */ 190 enum class FusedActivationFunc : int32_t { 191 /** NO fused activation function. */ 192 NONE = 0, 193 /** Fused ReLU activation function. */ 194 RELU = 1, 195 /** Fused ReLU1 activation function. */ 196 RELU1 = 2, 197 /** Fused ReLU6 activation function. */ 198 RELU6 = 3, 199 }; 200 201 using Dimension = uint32_t; 202 using Dimensions = std::vector<Dimension>; 203 204 using CacheToken = std::array<uint8_t, kByteSizeOfCacheToken>; 205 206 /** 207 * Describes the shape information of an output operand after execution. 208 */ 209 struct OutputShape { 210 /** 211 * Dimensions of the operand. 212 */ 213 std::vector<uint32_t> dimensions; 214 215 /** 216 * Whether the provided buffer size is sufficient for the output. 217 */ 218 bool isSufficient = false; 219 }; 220 221 struct ExecutionError { 222 std::string message; 223 ErrorStatus code = ErrorStatus::GENERAL_FAILURE; 224 // OutputShapes for code == OUTPUT_INSUFFICIENT_SIZE 225 std::vector<OutputShape> outputShapes = {}; 226 }; 227 228 template <typename Type> 229 using ExecutionResult = base::expected<Type, ExecutionError>; 230 231 /** 232 * The capabilities of a driver. 233 * 234 * This represents performance of non-extension operations. 235 * 236 * Performance of an operation other than {@link OperationType::IF} and 237 * {@link OperationType::WHILE} comes from the type of its first operand. 238 */ 239 struct Capabilities { 240 /** 241 * Performance information for the reference workload. 242 * 243 * Used by a driver to report its performance characteristics. 244 */ 245 struct PerformanceInfo { 246 /** 247 * Ratio of the time taken by the driver to execute the 248 * workload compared to the time the CPU would take for the 249 * same workload. A lower number is better. 250 */ 251 float execTime = kDefaultExecTime; 252 253 /** 254 * Ratio of the energy used by the driver compared to what 255 * the CPU would use for doing the same workload. A lower number 256 * is better. 257 */ 258 float powerUsage = kDefaultPowerUsage; 259 }; 260 261 /** 262 * Driver performance when operating on a particular data type. 263 * In the case of float32 data, this is used when the calculations 264 * are not relaxed. 265 */ 266 struct OperandPerformance { 267 OperandType type{}; 268 PerformanceInfo info; 269 }; 270 271 class OperandPerformanceTable { 272 public: 273 static Result<OperandPerformanceTable> create( 274 std::vector<OperandPerformance> operandPerformances); 275 276 PerformanceInfo lookup(OperandType type) const; 277 const std::vector<OperandPerformance>& asVector() const; 278 279 private: 280 explicit OperandPerformanceTable(std::vector<OperandPerformance> operandPerformances); 281 std::vector<OperandPerformance> mSorted; 282 }; 283 284 /** 285 * Driver performance when operating on float32 data but performing 286 * calculations with range and/or precision as low as that of the IEEE 287 * 754 16-bit floating-point format. 288 */ 289 PerformanceInfo relaxedFloat32toFloat16PerformanceScalar; 290 PerformanceInfo relaxedFloat32toFloat16PerformanceTensor; 291 292 /** 293 * Performance by operand type. Must be sorted by OperandType. 294 * 295 * If a particular {@link OperandType} is not present in operandPerformance, 296 * its performance is treated as 297 * { .execTime = FLT_MAX, .powerUsage = FLT_MAX }. 298 * 299 * Performance does not apply to {@link OperandType::SUBGRAPH}, and a driver 300 * must not report operand performance for {@link OperandType::SUBGRAPH}. 301 */ 302 OperandPerformanceTable operandPerformance; 303 304 /** 305 * Performance of an {@link OperationType::IF} operation is the sum of 306 * {@link Capabilities::ifPerformance} and the mean of performance for the 307 * two branch subgraphs, where performance for a subgraph is the sum of the 308 * performance of all operations within the subgraph. 309 */ 310 PerformanceInfo ifPerformance; 311 312 /** 313 * Performance of a {@link OperationType::WHILE} operation is the sum of 314 * {@link Capabilities::whilePerformance}, performance for the condition 315 * subgraph and performance for the body subgraph, where performance for a 316 * subgraph is the sum of the performance of all operations within the 317 * subgraph. 318 */ 319 PerformanceInfo whilePerformance; 320 }; 321 322 /** 323 * Information about an extension. 324 */ 325 struct Extension { 326 /** 327 * Information about an extension operand type. 328 */ 329 struct OperandTypeInformation { 330 /** 331 * The extension operand type. 332 */ 333 uint16_t type = 0; 334 335 /** 336 * Indicates whether the extension operand type represents a tensor or 337 * a scalar. 338 */ 339 bool isTensor = false; 340 341 /** 342 * The byte size of the operand (if scalar) or of a single element (if 343 * tensor). 344 */ 345 uint32_t byteSize = 0; 346 }; 347 348 /** 349 * The extension name. 350 * 351 * The name must consist of lowercase latin letters, numbers, periods, and 352 * underscore signs. The name must contain at least one period. 353 * 354 * The name must start with the reverse domain name of the vendor. 355 * 356 * Example: com.google.test_extension 357 */ 358 std::string name; 359 360 /** 361 * Information about operand types defined by the extension. 362 */ 363 std::vector<OperandTypeInformation> operandTypes; 364 }; 365 366 /** 367 * Describes one operation of the model's graph. 368 */ 369 struct Operation { 370 /** 371 * The operation type. 372 */ 373 OperationType type{}; 374 375 /** 376 * Describes the table that contains the indexes of the inputs of the 377 * operation. The offset is the index in the operandIndexes table. 378 */ 379 std::vector<uint32_t> inputs; 380 381 /** 382 * Describes the table that contains the indexes of the outputs of the 383 * operation. The offset is the index in the operandIndexes table. 384 */ 385 std::vector<uint32_t> outputs; 386 }; 387 388 /** 389 * Describes the location of a data object. 390 */ 391 struct DataLocation { 392 /** 393 * The address of the memory where the data is found. 394 * 395 * This field is only active when lifetime is POINTER. 396 */ 397 std::variant<const void*, void*> pointer; 398 399 /** 400 * The index of the memory pool where this location is found. 401 */ 402 uint32_t poolIndex = 0; 403 404 /** 405 * Offset in bytes from the start of the pool. 406 */ 407 uint32_t offset = 0; 408 409 /** 410 * The length of the data in bytes. 411 */ 412 uint32_t length = 0; 413 414 /** 415 * The end padding of the specified memory region in bytes. 416 */ 417 uint32_t padding = 0; 418 }; 419 420 /** 421 * Describes one operand of the model's graph. 422 */ 423 struct Operand { 424 /** 425 * How an operand is used. 426 */ 427 enum class LifeTime { 428 /** 429 * The operand is internal to the model. It's created by an operation and 430 * consumed by other operations. It must be an output operand of 431 * exactly one operation. 432 */ 433 TEMPORARY_VARIABLE = 0, 434 435 /** 436 * The operand is an input of a subgraph. It must not be an output 437 * operand of any operation. 438 * 439 * An operand can't be both input and output of a subgraph. 440 */ 441 SUBGRAPH_INPUT = 1, 442 443 /** 444 * The operand is an output of a subgraph. It must be an output 445 * operand of exactly one operation. 446 * 447 * An operand can't be both input and output of a subgraph. 448 */ 449 SUBGRAPH_OUTPUT = 2, 450 451 /** 452 * The operand is a constant found in Model::operandValues. It must 453 * not be an output operand of any operation. 454 */ 455 CONSTANT_COPY = 3, 456 457 /** 458 * The operand is a constant that was specified via a Memory 459 * object. It must not be an output operand of any operation. 460 */ 461 CONSTANT_REFERENCE = 4, 462 463 /** 464 * The operand does not have a value. This is valid only for optional 465 * arguments of operations. 466 */ 467 NO_VALUE = 5, 468 469 /** 470 * The operand is a reference to a subgraph. It must be an input to one 471 * or more {@link OperationType::IF} or {@link OperationType::WHILE} 472 * operations. 473 */ 474 SUBGRAPH = 6, 475 476 /** 477 * This operand is a constant found in a user buffer. It must not be an 478 * output operand of any operation. 479 */ 480 POINTER = 7, 481 }; 482 483 /** 484 * No additional parameters. 485 */ 486 using NoParams = std::monostate; 487 488 /** 489 * Parameters for TENSOR_QUANT8_SYMM_PER_CHANNEL operand. 490 */ 491 struct SymmPerChannelQuantParams { 492 /** Array of scaling values for each channel. Each value must be greater than zero. */ 493 std::vector<float> scales; 494 /** Index of the channel dimension */ 495 uint32_t channelDim = 0; 496 }; 497 498 /** 499 * Extension operand parameters. 500 * 501 * The framework treats this as an opaque data blob. 502 * The format is up to individual extensions. 503 */ 504 using ExtensionParams = std::vector<uint8_t>; 505 506 /** 507 * Additional parameters specific to a particular operand type. 508 */ 509 using ExtraParams = std::variant<NoParams, SymmPerChannelQuantParams, ExtensionParams>; 510 511 /** 512 * The data type. 513 * 514 * Besides the values listed in {@link OperationType}, any value equal or over 515 * (1 << kExtensionTypeBits) is possible and should be interpreted 516 * as an extension type according to {@link Model::extensionNameToPrefix}. 517 */ 518 OperandType type{}; 519 520 /** 521 * Dimensions of the operand. 522 * 523 * For a scalar operand, dimensions.size() must be 0. 524 * 525 * A tensor operand with all dimensions specified has "fully 526 * specified" dimensions. Whenever possible (i.e., whenever the 527 * dimensions are known at model construction time), a tensor 528 * operand should have (but is not required to have) fully 529 * specified dimensions, in order to enable the best possible 530 * performance. 531 * 532 * If a tensor operand's dimensions are not fully specified, the 533 * dimensions of the operand are deduced from the operand 534 * dimensions and values of the operation for which that operand 535 * is an output or from the corresponding {@link OperationType::IF} or 536 * {@link OperationType::WHILE} operation input operand dimensions in the 537 * case of referenced subgraph input operands. 538 * 539 * In the following situations, a tensor operand's dimensions must 540 * be fully specified: 541 * 542 * - The operand has lifetime CONSTANT_COPY, CONSTANT_REFERENCE, or 543 * POINTER. 544 * 545 * - The operand has lifetime SUBGRAPH_INPUT and belongs to the main 546 * subgraph. Fully specified dimensions must either be present in the 547 * Operand or they must be provided in the corresponding 548 * RequestArgument. 549 * EXCEPTION: If the input is optional and omitted 550 * (by setting the hasNoValue field of the corresponding 551 * RequestArgument to true) then it need not have fully 552 * specified dimensions. 553 * 554 * A tensor operand with some number of unspecified dimensions is 555 * represented by setting each unspecified dimension to 0. 556 * 557 * A tensor operand with unspecified rank is represented by providing 558 * an empty dimensions vector. 559 */ 560 Dimensions dimensions; 561 562 /** 563 * Quantized scale of the operand. 564 * 565 * Must be 0 when not applicable to an operand type. 566 * 567 * See {@link OperandType}. 568 */ 569 float scale = 0.0f; 570 571 /** 572 * Quantized zero-point offset of the operand. 573 * 574 * Must be 0 when not applicable to an operand type. 575 * 576 * See {@link OperandType}. 577 */ 578 int32_t zeroPoint = 0; 579 580 /** 581 * How the operand is used. 582 */ 583 LifeTime lifetime{}; 584 585 /** 586 * Where to find the data for this operand. 587 * If the lifetime is TEMPORARY_VARIABLE, SUBGRAPH_INPUT, SUBGRAPH_OUTPUT, 588 * or NO_VALUE: 589 * - All the fields must be 0. 590 * If the lifetime is CONSTANT_COPY: 591 * - location.pointer is null. 592 * - location.poolIndex is 0. 593 * - location.offset is the offset in bytes into Model::operandValues. 594 * - location.length is set. 595 * - location.padding is 0. 596 * If the lifetime is CONSTANT_REFERENCE: 597 * - location.pointer is null. 598 * - location.poolIndex is set. 599 * - location.offset is the offset in bytes into the specified pool. 600 * - location.length is set. 601 * - location.padding is set. 602 * If the lifetime is SUBGRAPH: 603 * - location.pointer is null. 604 * - location.poolIndex is 0. 605 * - location.offset is the index of the referenced subgraph in 606 * {@link Model::referenced}. 607 * - location.length is 0. 608 * - location.padding is 0. 609 * If the lifetime is POINTER: 610 * - location.pointer is non-null. 611 * - location.poolIndex is 0. 612 * - location.offset is 0. 613 * - location.length is set. 614 * - location.padding is 0. 615 */ 616 DataLocation location; 617 618 /** 619 * Additional parameters specific to a particular operand type. 620 */ 621 ExtraParams extraParams; 622 }; 623 624 struct Handle { 625 std::vector<base::unique_fd> fds; 626 std::vector<int> ints; 627 }; 628 629 using SharedHandle = std::shared_ptr<const Handle>; 630 631 struct Memory { 632 struct Ashmem { 633 base::unique_fd fd; 634 size_t size; 635 }; 636 637 struct Fd { 638 size_t size; 639 int prot; 640 base::unique_fd fd; 641 size_t offset; 642 }; 643 644 // RAII wrapper for AHardwareBuffer 645 struct HardwareBuffer { 646 using Deleter = std::add_pointer_t<void(AHardwareBuffer*)>; 647 using Handle = std::unique_ptr<AHardwareBuffer, Deleter>; 648 Handle handle; 649 }; 650 651 struct Unknown { 652 Handle handle; 653 size_t size; 654 std::string name; 655 }; 656 657 std::variant<Ashmem, Fd, HardwareBuffer, Unknown> handle; 658 }; 659 660 /** 661 * A Neural Network Model. 662 * 663 * This includes not only the execution graph, but also constant data such as 664 * weights or scalars added at construction time. The only information that 665 * may not be known is the shape of the input tensors. 666 */ 667 struct Model { 668 /** 669 * An excerpt of the execution graph. 670 */ 671 struct Subgraph { 672 /** 673 * All operands included in the subgraph. 674 */ 675 std::vector<Operand> operands; 676 677 /** 678 * All operations included in the subgraph. 679 * 680 * The operations are sorted into execution order. Every operand 681 * with lifetime SUBGRAPH_OUTPUT or TEMPORARY_VARIABLE must be 682 * written before it is read. 683 */ 684 std::vector<Operation> operations; 685 686 /** 687 * Input indexes of the subgraph. There must be at least one. 688 * 689 * Each value corresponds to the index of the operand in "operands". 690 */ 691 std::vector<uint32_t> inputIndexes; 692 693 /** 694 * Output indexes of the subgraph. There must be at least one. 695 * 696 * Each value corresponds to the index of the operand in "operands". 697 */ 698 std::vector<uint32_t> outputIndexes; 699 }; 700 701 class OperandValues { 702 public: 703 OperandValues(); 704 OperandValues(const uint8_t* data, size_t length); 705 706 // Append a segment of memory (starting at `data` with `length` number of bytes) to the back 707 // of `OperandValues`, adding padding as necessary so that the appended data is aligned. 708 // Refer to `getAlignmentForLength` for more information on alignment (such as what the 709 // current alignments are for different data lengths). 710 DataLocation append(const uint8_t* data, size_t length); 711 712 const uint8_t* data() const; 713 size_t size() const; 714 715 private: 716 std::vector<uint8_t> mData; 717 }; 718 719 /** 720 * A correspondence between an extension name and a prefix of operand and 721 * operation type values. 722 */ 723 struct ExtensionNameAndPrefix { 724 /** 725 * The extension name. 726 * 727 * See {@link Extension::name} for the format specification. 728 */ 729 std::string name; 730 731 /** 732 * The unique extension identifier within the model. 733 * 734 * See {@link Model::extensionNameToPrefix}. 735 */ 736 uint16_t prefix = 0; 737 }; 738 739 /** 740 * The top-level subgraph. 741 */ 742 Subgraph main; 743 744 /** 745 * Referenced subgraphs. 746 * 747 * Each subgraph is referenced by the main subgraph or at least one other 748 * referenced subgraph. 749 * 750 * There must be no reference cycles. 751 */ 752 std::vector<Subgraph> referenced; 753 754 /** 755 * A byte buffer containing operand data that were copied into the model. 756 * 757 * An operand's value must be located here if and only if Operand::lifetime 758 * equals Operand::LifeTime::CONSTANT_COPY. 759 */ 760 OperandValues operandValues; 761 762 /** 763 * A collection of shared memory pools containing operand values. 764 * 765 * An operand's value must be located here if and only if Operand::lifetime 766 * equals Operand::LifeTime::CONSTANT_REFERENCE. 767 */ 768 std::vector<SharedMemory> pools; 769 770 /** 771 * 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or 772 * precision as low as that of the IEEE 754 16-bit floating-point format. 773 * 'false' indicates TENSOR_FLOAT32 must be calculated using at least the 774 * range and precision of the IEEE 754 32-bit floating-point format. 775 */ 776 bool relaxComputationFloat32toFloat16 = false; 777 778 /** 779 * The mapping between extension names and prefixes of operand and 780 * operation type values. 781 * 782 * An operand or operation whose numeric type value is equal to or greater 783 * than (1 << kExtensionTypeBits) should be interpreted 784 * as an extension operand. The low 785 * {@link kExtensionTypeBits} bits of the value correspond to the type ID 786 * within the extension and the high {@link kExtensionPrefixBits} bits encode 787 * the "prefix", which maps uniquely to the extension name. 788 * 789 * For example, if a model contains an operation whose value is 790 * 0xAAAABBBB and extensionNameToPrefix contains an entry with 791 * prefix=0xAAAA and name="vendor.test.test_extension", then 792 * the operation should be interpreted as the operation 0xBBBB 793 * of the extension named vendor.test.test_extension. 794 * 795 * This is a one-to-one correspondence. That is, there must be at most one 796 * prefix corresponding to each extension name and at most one extension 797 * name corresponding to each prefix. 798 */ 799 std::vector<ExtensionNameAndPrefix> extensionNameToPrefix; 800 }; 801 802 /** 803 * A buffer descriptor. Describes the properties of a buffer. 804 */ 805 struct BufferDesc { 806 /** 807 * Dimensions of the buffer. May have unknown dimensions or rank. A buffer with some number 808 * of unspecified dimensions is represented by setting each unspecified dimension to 0. A 809 * buffer with unspecified rank is represented by providing an empty dimensions vector. 810 */ 811 Dimensions dimensions; 812 }; 813 814 /** 815 * Describes a role of an input or output to a prepared model. 816 */ 817 struct BufferRole { 818 /** 819 * The index of the IPreparedModel within the "preparedModel" argument passed in 820 * IDevice::allocate. 821 */ 822 uint32_t modelIndex = 0; 823 824 /** 825 * The index of the input or output operand. 826 */ 827 uint32_t ioIndex = 0; 828 829 /** 830 * A floating-point value within the range (0.0, 1.0]. Describes how likely the 831 * buffer is to be used in the specified role. This is provided as a hint to 832 * optimize the case when multiple roles prefer different buffer locations or data 833 * layouts. 834 */ 835 float probability = 0.0f; 836 }; 837 838 /** 839 * Inputs to be sent to and outputs to be retrieved from a prepared model. 840 * 841 * A Request serves two primary tasks: 842 * 1) Provides the input and output data to be used when executing the model. 843 * 2) Specifies any updates to the input operand metadata that were left 844 * unspecified at model preparation time. 845 * 846 * An output must not overlap with any other output, with an input, or 847 * with an operand of lifetime CONSTANT_REFERENCE. 848 */ 849 struct Request { 850 /** 851 * Metadata information specifying the location of the input or output data and 852 * any updates to the input or output operand. 853 */ 854 struct Argument { 855 enum class LifeTime { 856 POOL = 0, 857 NO_VALUE = 1, 858 POINTER = 2, 859 }; 860 861 LifeTime lifetime{}; 862 863 /** 864 * The location within one of the memory pools passed in the Request. 865 */ 866 DataLocation location; 867 868 /** 869 * Updated dimension information. 870 * 871 * If dimensions.size() > 0, dimension information was provided 872 * along with the argument. This can be the case for models that 873 * accept inputs of varying size. This can't change the rank, just 874 * the value of the dimensions that were unspecified in the 875 * model. If dimensions.size() > 0, then all dimensions must be 876 * specified here; and any dimension that was specified in the 877 * model must have the same value here. 878 * 879 * If the dimensions in the model are not fully specified, then 880 * they must be fully specified here, unless hasNoValue is set to 881 * true. If the dimensions in the model are fully specified, then 882 * either dimensions.size() may be 0, or the dimensions in the 883 * model must be identical to the dimensions here. 884 */ 885 Dimensions dimensions; 886 }; 887 888 /** 889 * Specifies a driver-managed buffer. It is the token corresponding to an 890 * IBuffer returned from IDevice::allocate, and is specific to the IDevice 891 * object. 892 */ 893 enum class MemoryDomainToken : uint32_t {}; 894 895 /** 896 * A memory pool. 897 */ 898 using MemoryPool = std::variant<SharedMemory, MemoryDomainToken, SharedBuffer>; 899 900 /** 901 * Input data and information to be used in the execution of a prepared 902 * model. 903 * 904 * The index of the input corresponds to the index in Model::main::inputIndexes. 905 * E.g., inputs[i] corresponds to Model::main::inputIndexes[i]. 906 */ 907 std::vector<Argument> inputs; 908 909 /** 910 * Output data and information to be used in the execution of a prepared 911 * model. 912 * 913 * The index of the output corresponds to the index in Model::main::outputIndexes. 914 * E.g., outputs[i] corresponds to Model::main::outputIndexes[i]. 915 */ 916 std::vector<Argument> outputs; 917 918 /** 919 * A collection of memory pools containing operand data for both the 920 * inputs and the outputs to a model. 921 */ 922 std::vector<MemoryPool> pools; 923 }; 924 925 // Representation of sync_fence. 926 class SyncFence { 927 public: 928 static SyncFence createAsSignaled(); 929 static SyncFence create(base::unique_fd fd); 930 static Result<SyncFence> create(SharedHandle syncFence); 931 932 // The function syncWait() has the same semantics as the system function 933 // ::sync_wait(), except that the syncWait() return value is semantically 934 // richer. 935 enum class FenceState { 936 ACTIVE, // fence has not been signaled 937 SIGNALED, // fence has been signaled 938 ERROR, // fence has been placed in the error state 939 UNKNOWN, // either bad argument passed to syncWait(), or internal error 940 }; 941 using Timeout = std::chrono::duration<int, std::milli>; 942 using OptionalTimeout = std::optional<Timeout>; 943 944 FenceState syncWait(OptionalTimeout optionalTimeout) const; 945 946 SharedHandle getSharedHandle() const; 947 bool hasFd() const; 948 int getFd() const; 949 950 private: 951 explicit SyncFence(SharedHandle syncFence); 952 953 SharedHandle mSyncFence; 954 }; 955 956 using Clock = base::boot_clock; 957 958 using Duration = std::chrono::nanoseconds; 959 using OptionalDuration = std::optional<Duration>; 960 961 using TimePoint = std::chrono::time_point<Clock, Duration>; 962 using OptionalTimePoint = std::optional<TimePoint>; 963 964 /** 965 * Timing information measured during execution. Each time is a duration from 966 * the beginning of some task to the end of that task, including time when that 967 * task is not active (for example, preempted by some other task, or 968 * waiting for some resource to become available). 969 * 970 * Times are measured in nanoseconds. 971 */ 972 struct Timing { 973 /** Execution time on device (not driver, which runs on host processor). */ 974 OptionalDuration timeOnDevice; 975 /** Execution time in driver (including time on device). */ 976 OptionalDuration timeInDriver; 977 }; 978 979 // Returns status, timingLaunched, timingFenced 980 using ExecuteFencedInfoCallback = std::function<GeneralResult<std::pair<Timing, Timing>>()>; 981 982 enum class Version { ANDROID_OC_MR1, ANDROID_P, ANDROID_Q, ANDROID_R, ANDROID_S, CURRENT_RUNTIME }; 983 984 // Describes the memory preference of an operand. 985 struct MemoryPreference { 986 // Must be a power of 2. 987 // For pointer buffers, the alignment is satisfied if the address of the pointer is a multiple 988 // of the "alignment" value. For memory pools, the alignment is satisfied if the offset of the 989 // sub-region specified by DataLocation is a multiple of the "alignment" value. 990 uint32_t alignment; 991 // Must be a power of 2. 992 // For both pointer buffers and memory pools, the padding is satisfied if the padded length is 993 // greater than or equal to the raw size of the operand (i.e. the size of an element multiplied 994 // by the number of elements) rounding up to a multiple of the "padding" value. In DataLocation, 995 // the padded length equals to the sum of the length and padding fields. 996 uint32_t padding; 997 }; 998 999 } // namespace android::nn 1000 1001 #endif // ANDROID_FRAMEWORKS_ML_NN_COMMON_NNAPI_TYPES_H 1002