• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_NNAPI_TYPES_H
18 #define ANDROID_FRAMEWORKS_ML_NN_COMMON_NNAPI_TYPES_H
19 
20 #include <android-base/chrono_utils.h>
21 #include <android-base/expected.h>
22 #include <android-base/unique_fd.h>
23 
24 #include <array>
25 #include <chrono>
26 #include <limits>
27 #include <memory>
28 #include <optional>
29 #include <string>
30 #include <type_traits>
31 #include <utility>
32 #include <variant>
33 #include <vector>
34 
35 #include "nnapi/OperandTypes.h"
36 #include "nnapi/OperationTypes.h"
37 #include "nnapi/Result.h"
38 
39 // Forward declare AHardwareBuffer
40 extern "C" typedef struct AHardwareBuffer AHardwareBuffer;
41 
42 namespace android::nn {
43 
44 // Forward declarations
45 
46 class IBuffer;
47 class IBurst;
48 class IDevice;
49 class IExecution;
50 class IPreparedModel;
51 struct Memory;
52 
53 // Constants
54 
55 constexpr float kDefaultExecTime = std::numeric_limits<float>::max();
56 constexpr float kDefaultPowerUsage = std::numeric_limits<float>::max();
57 constexpr uint32_t kByteSizeOfCacheToken = 32;
58 constexpr uint32_t kMaxNumberOfCacheFiles = 32;
59 
60 /**
61  * Numeric values of extension operand and operation types have the
62  * following structure:
63  * - 16 high bits represent the "prefix", which corresponds uniquely to the
64  *   extension name.
65  * - 16 low bits represent the type ID within the extension.
66  */
67 constexpr uint8_t kExtensionTypeBits = 16;
68 constexpr uint8_t kExtensionPrefixBits = 16;
69 constexpr uint32_t kTypeWithinExtensionMask = 0xFFFF;
70 
71 constexpr uint32_t kDefaultRequestMemoryAlignment = 64;
72 constexpr uint32_t kDefaultRequestMemoryPadding = 64;
73 constexpr uint32_t kMinMemoryAlignment = alignof(std::max_align_t);
74 constexpr uint32_t kMinMemoryPadding = 1;
75 constexpr auto kLoopTimeoutDefault = std::chrono::seconds{2};
76 constexpr auto kLoopTimeoutMaximum = std::chrono::seconds{15};
77 
78 // Aliases
79 
80 using SharedBuffer = std::shared_ptr<const IBuffer>;
81 using SharedBurst = std::shared_ptr<const IBurst>;
82 using SharedDevice = std::shared_ptr<const IDevice>;
83 using SharedExecution = std::shared_ptr<const IExecution>;
84 using SharedMemory = std::shared_ptr<const Memory>;
85 using SharedPreparedModel = std::shared_ptr<const IPreparedModel>;
86 
87 // Canonical types
88 
89 /**
90  * Status of a device.
91  */
92 enum class DeviceStatus {
93     AVAILABLE = 0,
94     BUSY = 1,
95     OFFLINE = 2,
96     UNKNOWN = 3,
97 };
98 
99 /**
100  * Execution preferences.
101  */
102 enum class ExecutionPreference {
103     /**
104      * Prefer executing in a way that minimizes battery drain.
105      * This is desirable for compilations that will be executed often.
106      */
107     LOW_POWER = 0,
108     /**
109      * Prefer returning a single answer as fast as possible, even if this causes
110      * more power consumption.
111      */
112     FAST_SINGLE_ANSWER = 1,
113     /**
114      * Prefer maximizing the throughput of successive frames, for example when
115      * processing successive frames coming from the camera.
116      */
117     SUSTAINED_SPEED = 2,
118     DEFAULT = FAST_SINGLE_ANSWER,
119 };
120 
121 /**
122  * Device types.
123  *
124  * The type of NNAPI device.
125  */
126 enum class DeviceType {
127     /** The device type cannot be provided. */
128     UNKNOWN = 0,
129     /** The device does not fall into any category below. */
130     OTHER = 1,
131     /** The device runs NNAPI models on single or multi-core CPU. */
132     CPU = 2,
133     /** The device can run NNAPI models and also accelerate graphics APIs such
134      * as OpenGL ES and Vulkan. */
135     GPU = 3,
136     /** Dedicated accelerator for Machine Learning workloads. */
137     ACCELERATOR = 4,
138 };
139 
140 /**
141  * Specifies whether or not to measure timing information during execution.
142  */
143 enum class MeasureTiming {
144     NO = 0,
145     YES = 1,
146 };
147 
148 /**
149  * Priority given to a prepared model for execution.
150  */
151 enum class Priority {
152     LOW = 0,
153     MEDIUM = 1,
154     HIGH = 2,
155     DEFAULT = MEDIUM,
156 };
157 
158 // TODO: Should more errors from NeuralNetworks.h be incorporated? The left name shows errors that
159 // appear in NeuralNetworks.h but not in the HAL, and the right column shows what these values could
160 // map to:
161 // * OUT_OF_MEMORY ==> GENERAL_FAILURE / RESOURCE_EXHAUSTED_*
162 // * INCOMPLETE ==> GENERAL_FAILURE
163 // * UNEXPECTED_NULL ==> INVALID_ARGUMENT
164 // * UNMAPPABLE ==> GENERAL_FAILURE
165 // * BAD_STATE ==> INVALID_ARGUMENT
166 enum class ErrorStatus {
167     NONE = 0,
168     DEVICE_UNAVAILABLE = 1,
169     GENERAL_FAILURE = 2,
170     OUTPUT_INSUFFICIENT_SIZE = 3,
171     INVALID_ARGUMENT = 4,
172     MISSED_DEADLINE_TRANSIENT = 5,
173     MISSED_DEADLINE_PERSISTENT = 6,
174     RESOURCE_EXHAUSTED_TRANSIENT = 7,
175     RESOURCE_EXHAUSTED_PERSISTENT = 8,
176     DEAD_OBJECT = 10000,
177 };
178 
179 struct GeneralError {
180     std::string message;
181     ErrorStatus code = ErrorStatus::GENERAL_FAILURE;
182 };
183 
184 template <typename Type>
185 using GeneralResult = base::expected<Type, GeneralError>;
186 
187 /**
188  * Fused activation function types.
189  */
190 enum class FusedActivationFunc : int32_t {
191     /** NO fused activation function. */
192     NONE = 0,
193     /** Fused ReLU activation function. */
194     RELU = 1,
195     /** Fused ReLU1 activation function. */
196     RELU1 = 2,
197     /** Fused ReLU6 activation function. */
198     RELU6 = 3,
199 };
200 
201 using Dimension = uint32_t;
202 using Dimensions = std::vector<Dimension>;
203 
204 using CacheToken = std::array<uint8_t, kByteSizeOfCacheToken>;
205 
206 /**
207  * Describes the shape information of an output operand after execution.
208  */
209 struct OutputShape {
210     /**
211      * Dimensions of the operand.
212      */
213     std::vector<uint32_t> dimensions;
214 
215     /**
216      * Whether the provided buffer size is sufficient for the output.
217      */
218     bool isSufficient = false;
219 };
220 
221 struct ExecutionError {
222     std::string message;
223     ErrorStatus code = ErrorStatus::GENERAL_FAILURE;
224     // OutputShapes for code == OUTPUT_INSUFFICIENT_SIZE
225     std::vector<OutputShape> outputShapes = {};
226 };
227 
228 template <typename Type>
229 using ExecutionResult = base::expected<Type, ExecutionError>;
230 
231 /**
232  * The capabilities of a driver.
233  *
234  * This represents performance of non-extension operations.
235  *
236  * Performance of an operation other than {@link OperationType::IF} and
237  * {@link OperationType::WHILE} comes from the type of its first operand.
238  */
239 struct Capabilities {
240     /**
241      * Performance information for the reference workload.
242      *
243      * Used by a driver to report its performance characteristics.
244      */
245     struct PerformanceInfo {
246         /**
247          * Ratio of the time taken by the driver to execute the
248          * workload compared to the time the CPU would take for the
249          * same workload. A lower number is better.
250          */
251         float execTime = kDefaultExecTime;
252 
253         /**
254          * Ratio of the energy used by the driver compared to what
255          * the CPU would use for doing the same workload. A lower number
256          * is better.
257          */
258         float powerUsage = kDefaultPowerUsage;
259     };
260 
261     /**
262      * Driver performance when operating on a particular data type.
263      * In the case of float32 data, this is used when the calculations
264      * are not relaxed.
265      */
266     struct OperandPerformance {
267         OperandType type{};
268         PerformanceInfo info;
269     };
270 
271     class OperandPerformanceTable {
272        public:
273         static Result<OperandPerformanceTable> create(
274                 std::vector<OperandPerformance> operandPerformances);
275 
276         PerformanceInfo lookup(OperandType type) const;
277         const std::vector<OperandPerformance>& asVector() const;
278 
279        private:
280         explicit OperandPerformanceTable(std::vector<OperandPerformance> operandPerformances);
281         std::vector<OperandPerformance> mSorted;
282     };
283 
284     /**
285      * Driver performance when operating on float32 data but performing
286      * calculations with range and/or precision as low as that of the IEEE
287      * 754 16-bit floating-point format.
288      */
289     PerformanceInfo relaxedFloat32toFloat16PerformanceScalar;
290     PerformanceInfo relaxedFloat32toFloat16PerformanceTensor;
291 
292     /**
293      * Performance by operand type. Must be sorted by OperandType.
294      *
295      * If a particular {@link OperandType} is not present in operandPerformance,
296      * its performance is treated as
297      * { .execTime = FLT_MAX, .powerUsage = FLT_MAX }.
298      *
299      * Performance does not apply to {@link OperandType::SUBGRAPH}, and a driver
300      * must not report operand performance for {@link OperandType::SUBGRAPH}.
301      */
302     OperandPerformanceTable operandPerformance;
303 
304     /**
305      * Performance of an {@link OperationType::IF} operation is the sum of
306      * {@link Capabilities::ifPerformance} and the mean of performance for the
307      * two branch subgraphs, where performance for a subgraph is the sum of the
308      * performance of all operations within the subgraph.
309      */
310     PerformanceInfo ifPerformance;
311 
312     /**
313      * Performance of a {@link OperationType::WHILE} operation is the sum of
314      * {@link Capabilities::whilePerformance}, performance for the condition
315      * subgraph and performance for the body subgraph, where performance for a
316      * subgraph is the sum of the performance of all operations within the
317      * subgraph.
318      */
319     PerformanceInfo whilePerformance;
320 };
321 
322 /**
323  * Information about an extension.
324  */
325 struct Extension {
326     /**
327      * Information about an extension operand type.
328      */
329     struct OperandTypeInformation {
330         /**
331          * The extension operand type.
332          */
333         uint16_t type = 0;
334 
335         /**
336          * Indicates whether the extension operand type represents a tensor or
337          * a scalar.
338          */
339         bool isTensor = false;
340 
341         /**
342          * The byte size of the operand (if scalar) or of a single element (if
343          * tensor).
344          */
345         uint32_t byteSize = 0;
346     };
347 
348     /**
349      * The extension name.
350      *
351      * The name must consist of lowercase latin letters, numbers, periods, and
352      * underscore signs. The name must contain at least one period.
353      *
354      * The name must start with the reverse domain name of the vendor.
355      *
356      * Example: com.google.test_extension
357      */
358     std::string name;
359 
360     /**
361      * Information about operand types defined by the extension.
362      */
363     std::vector<OperandTypeInformation> operandTypes;
364 };
365 
366 /**
367  * Describes one operation of the model's graph.
368  */
369 struct Operation {
370     /**
371      * The operation type.
372      */
373     OperationType type{};
374 
375     /**
376      * Describes the table that contains the indexes of the inputs of the
377      * operation. The offset is the index in the operandIndexes table.
378      */
379     std::vector<uint32_t> inputs;
380 
381     /**
382      * Describes the table that contains the indexes of the outputs of the
383      * operation. The offset is the index in the operandIndexes table.
384      */
385     std::vector<uint32_t> outputs;
386 };
387 
388 /**
389  * Describes the location of a data object.
390  */
391 struct DataLocation {
392     /**
393      * The address of the memory where the data is found.
394      *
395      * This field is only active when lifetime is POINTER.
396      */
397     std::variant<const void*, void*> pointer;
398 
399     /**
400      * The index of the memory pool where this location is found.
401      */
402     uint32_t poolIndex = 0;
403 
404     /**
405      * Offset in bytes from the start of the pool.
406      */
407     uint32_t offset = 0;
408 
409     /**
410      * The length of the data in bytes.
411      */
412     uint32_t length = 0;
413 
414     /**
415      * The end padding of the specified memory region in bytes.
416      */
417     uint32_t padding = 0;
418 };
419 
420 /**
421  * Describes one operand of the model's graph.
422  */
423 struct Operand {
424     /**
425      * How an operand is used.
426      */
427     enum class LifeTime {
428         /**
429          * The operand is internal to the model. It's created by an operation and
430          * consumed by other operations. It must be an output operand of
431          * exactly one operation.
432          */
433         TEMPORARY_VARIABLE = 0,
434 
435         /**
436          * The operand is an input of a subgraph. It must not be an output
437          * operand of any operation.
438          *
439          * An operand can't be both input and output of a subgraph.
440          */
441         SUBGRAPH_INPUT = 1,
442 
443         /**
444          * The operand is an output of a subgraph. It must be an output
445          * operand of exactly one operation.
446          *
447          * An operand can't be both input and output of a subgraph.
448          */
449         SUBGRAPH_OUTPUT = 2,
450 
451         /**
452          * The operand is a constant found in Model::operandValues. It must
453          * not be an output operand of any operation.
454          */
455         CONSTANT_COPY = 3,
456 
457         /**
458          * The operand is a constant that was specified via a Memory
459          * object. It must not be an output operand of any operation.
460          */
461         CONSTANT_REFERENCE = 4,
462 
463         /**
464          * The operand does not have a value. This is valid only for optional
465          * arguments of operations.
466          */
467         NO_VALUE = 5,
468 
469         /**
470          * The operand is a reference to a subgraph. It must be an input to one
471          * or more {@link OperationType::IF} or {@link OperationType::WHILE}
472          * operations.
473          */
474         SUBGRAPH = 6,
475 
476         /**
477          * This operand is a constant found in a user buffer. It must not be an
478          * output operand of any operation.
479          */
480         POINTER = 7,
481     };
482 
483     /**
484      * No additional parameters.
485      */
486     using NoParams = std::monostate;
487 
488     /**
489      * Parameters for TENSOR_QUANT8_SYMM_PER_CHANNEL operand.
490      */
491     struct SymmPerChannelQuantParams {
492         /** Array of scaling values for each channel. Each value must be greater than zero. */
493         std::vector<float> scales;
494         /** Index of the channel dimension */
495         uint32_t channelDim = 0;
496     };
497 
498     /**
499      * Extension operand parameters.
500      *
501      * The framework treats this as an opaque data blob.
502      * The format is up to individual extensions.
503      */
504     using ExtensionParams = std::vector<uint8_t>;
505 
506     /**
507      * Additional parameters specific to a particular operand type.
508      */
509     using ExtraParams = std::variant<NoParams, SymmPerChannelQuantParams, ExtensionParams>;
510 
511     /**
512      * The data type.
513      *
514      * Besides the values listed in {@link OperationType}, any value equal or over
515      * (1 << kExtensionTypeBits) is possible and should be interpreted
516      * as an extension type according to {@link Model::extensionNameToPrefix}.
517      */
518     OperandType type{};
519 
520     /**
521      * Dimensions of the operand.
522      *
523      * For a scalar operand, dimensions.size() must be 0.
524      *
525      * A tensor operand with all dimensions specified has "fully
526      * specified" dimensions. Whenever possible (i.e., whenever the
527      * dimensions are known at model construction time), a tensor
528      * operand should have (but is not required to have) fully
529      * specified dimensions, in order to enable the best possible
530      * performance.
531      *
532      * If a tensor operand's dimensions are not fully specified, the
533      * dimensions of the operand are deduced from the operand
534      * dimensions and values of the operation for which that operand
535      * is an output or from the corresponding {@link OperationType::IF} or
536      * {@link OperationType::WHILE} operation input operand dimensions in the
537      * case of referenced subgraph input operands.
538      *
539      * In the following situations, a tensor operand's dimensions must
540      * be fully specified:
541      *
542      *     - The operand has lifetime CONSTANT_COPY, CONSTANT_REFERENCE, or
543      *       POINTER.
544      *
545      *     - The operand has lifetime SUBGRAPH_INPUT and belongs to the main
546      *       subgraph. Fully specified dimensions must either be present in the
547      *       Operand or they must be provided in the corresponding
548      *       RequestArgument.
549      *       EXCEPTION: If the input is optional and omitted
550      *       (by setting the hasNoValue field of the corresponding
551      *       RequestArgument to true) then it need not have fully
552      *       specified dimensions.
553      *
554      * A tensor operand with some number of unspecified dimensions is
555      * represented by setting each unspecified dimension to 0.
556      *
557      * A tensor operand with unspecified rank is represented by providing
558      * an empty dimensions vector.
559      */
560     Dimensions dimensions;
561 
562     /**
563      * Quantized scale of the operand.
564      *
565      * Must be 0 when not applicable to an operand type.
566      *
567      * See {@link OperandType}.
568      */
569     float scale = 0.0f;
570 
571     /**
572      * Quantized zero-point offset of the operand.
573      *
574      * Must be 0 when not applicable to an operand type.
575      *
576      * See {@link OperandType}.
577      */
578     int32_t zeroPoint = 0;
579 
580     /**
581      * How the operand is used.
582      */
583     LifeTime lifetime{};
584 
585     /**
586      * Where to find the data for this operand.
587      * If the lifetime is TEMPORARY_VARIABLE, SUBGRAPH_INPUT, SUBGRAPH_OUTPUT,
588      * or NO_VALUE:
589      * - All the fields must be 0.
590      * If the lifetime is CONSTANT_COPY:
591      * - location.pointer is null.
592      * - location.poolIndex is 0.
593      * - location.offset is the offset in bytes into Model::operandValues.
594      * - location.length is set.
595      * - location.padding is 0.
596      * If the lifetime is CONSTANT_REFERENCE:
597      * - location.pointer is null.
598      * - location.poolIndex is set.
599      * - location.offset is the offset in bytes into the specified pool.
600      * - location.length is set.
601      * - location.padding is set.
602      * If the lifetime is SUBGRAPH:
603      * - location.pointer is null.
604      * - location.poolIndex is 0.
605      * - location.offset is the index of the referenced subgraph in
606      *   {@link Model::referenced}.
607      * - location.length is 0.
608      * - location.padding is 0.
609      * If the lifetime is POINTER:
610      * - location.pointer is non-null.
611      * - location.poolIndex is 0.
612      * - location.offset is 0.
613      * - location.length is set.
614      * - location.padding is 0.
615      */
616     DataLocation location;
617 
618     /**
619      * Additional parameters specific to a particular operand type.
620      */
621     ExtraParams extraParams;
622 };
623 
624 struct Handle {
625     std::vector<base::unique_fd> fds;
626     std::vector<int> ints;
627 };
628 
629 using SharedHandle = std::shared_ptr<const Handle>;
630 
631 struct Memory {
632     struct Ashmem {
633         base::unique_fd fd;
634         size_t size;
635     };
636 
637     struct Fd {
638         size_t size;
639         int prot;
640         base::unique_fd fd;
641         size_t offset;
642     };
643 
644     // RAII wrapper for AHardwareBuffer
645     struct HardwareBuffer {
646         using Deleter = std::add_pointer_t<void(AHardwareBuffer*)>;
647         using Handle = std::unique_ptr<AHardwareBuffer, Deleter>;
648         Handle handle;
649     };
650 
651     struct Unknown {
652         Handle handle;
653         size_t size;
654         std::string name;
655     };
656 
657     std::variant<Ashmem, Fd, HardwareBuffer, Unknown> handle;
658 };
659 
660 /**
661  * A Neural Network Model.
662  *
663  * This includes not only the execution graph, but also constant data such as
664  * weights or scalars added at construction time. The only information that
665  * may not be known is the shape of the input tensors.
666  */
667 struct Model {
668     /**
669      * An excerpt of the execution graph.
670      */
671     struct Subgraph {
672         /**
673          * All operands included in the subgraph.
674          */
675         std::vector<Operand> operands;
676 
677         /**
678          * All operations included in the subgraph.
679          *
680          * The operations are sorted into execution order. Every operand
681          * with lifetime SUBGRAPH_OUTPUT or TEMPORARY_VARIABLE must be
682          * written before it is read.
683          */
684         std::vector<Operation> operations;
685 
686         /**
687          * Input indexes of the subgraph. There must be at least one.
688          *
689          * Each value corresponds to the index of the operand in "operands".
690          */
691         std::vector<uint32_t> inputIndexes;
692 
693         /**
694          * Output indexes of the subgraph. There must be at least one.
695          *
696          * Each value corresponds to the index of the operand in "operands".
697          */
698         std::vector<uint32_t> outputIndexes;
699     };
700 
701     class OperandValues {
702        public:
703         OperandValues();
704         OperandValues(const uint8_t* data, size_t length);
705 
706         // Append a segment of memory (starting at `data` with `length` number of bytes) to the back
707         // of `OperandValues`, adding padding as necessary so that the appended data is aligned.
708         // Refer to `getAlignmentForLength` for more information on alignment (such as what the
709         // current alignments are for different data lengths).
710         DataLocation append(const uint8_t* data, size_t length);
711 
712         const uint8_t* data() const;
713         size_t size() const;
714 
715        private:
716         std::vector<uint8_t> mData;
717     };
718 
719     /**
720      * A correspondence between an extension name and a prefix of operand and
721      * operation type values.
722      */
723     struct ExtensionNameAndPrefix {
724         /**
725          * The extension name.
726          *
727          * See {@link Extension::name} for the format specification.
728          */
729         std::string name;
730 
731         /**
732          * The unique extension identifier within the model.
733          *
734          * See {@link Model::extensionNameToPrefix}.
735          */
736         uint16_t prefix = 0;
737     };
738 
739     /**
740      * The top-level subgraph.
741      */
742     Subgraph main;
743 
744     /**
745      * Referenced subgraphs.
746      *
747      * Each subgraph is referenced by the main subgraph or at least one other
748      * referenced subgraph.
749      *
750      * There must be no reference cycles.
751      */
752     std::vector<Subgraph> referenced;
753 
754     /**
755      * A byte buffer containing operand data that were copied into the model.
756      *
757      * An operand's value must be located here if and only if Operand::lifetime
758      * equals Operand::LifeTime::CONSTANT_COPY.
759      */
760     OperandValues operandValues;
761 
762     /**
763      * A collection of shared memory pools containing operand values.
764      *
765      * An operand's value must be located here if and only if Operand::lifetime
766      * equals Operand::LifeTime::CONSTANT_REFERENCE.
767      */
768     std::vector<SharedMemory> pools;
769 
770     /**
771      * 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or
772      * precision as low as that of the IEEE 754 16-bit floating-point format.
773      * 'false' indicates TENSOR_FLOAT32 must be calculated using at least the
774      * range and precision of the IEEE 754 32-bit floating-point format.
775      */
776     bool relaxComputationFloat32toFloat16 = false;
777 
778     /**
779      * The mapping between extension names and prefixes of operand and
780      * operation type values.
781      *
782      * An operand or operation whose numeric type value is equal to or greater
783      * than (1 << kExtensionTypeBits) should be interpreted
784      * as an extension operand. The low
785      * {@link kExtensionTypeBits} bits of the value correspond to the type ID
786      * within the extension and the high {@link kExtensionPrefixBits} bits encode
787      * the "prefix", which maps uniquely to the extension name.
788      *
789      * For example, if a model contains an operation whose value is
790      * 0xAAAABBBB and extensionNameToPrefix contains an entry with
791      * prefix=0xAAAA and name="vendor.test.test_extension", then
792      * the operation should be interpreted as the operation 0xBBBB
793      * of the extension named vendor.test.test_extension.
794      *
795      * This is a one-to-one correspondence. That is, there must be at most one
796      * prefix corresponding to each extension name and at most one extension
797      * name corresponding to each prefix.
798      */
799     std::vector<ExtensionNameAndPrefix> extensionNameToPrefix;
800 };
801 
802 /**
803  * A buffer descriptor. Describes the properties of a buffer.
804  */
805 struct BufferDesc {
806     /**
807      * Dimensions of the buffer. May have unknown dimensions or rank. A buffer with some number
808      * of unspecified dimensions is represented by setting each unspecified dimension to 0. A
809      * buffer with unspecified rank is represented by providing an empty dimensions vector.
810      */
811     Dimensions dimensions;
812 };
813 
814 /**
815  * Describes a role of an input or output to a prepared model.
816  */
817 struct BufferRole {
818     /**
819      * The index of the IPreparedModel within the "preparedModel" argument passed in
820      * IDevice::allocate.
821      */
822     uint32_t modelIndex = 0;
823 
824     /**
825      * The index of the input or output operand.
826      */
827     uint32_t ioIndex = 0;
828 
829     /**
830      * A floating-point value within the range (0.0, 1.0]. Describes how likely the
831      * buffer is to be used in the specified role. This is provided as a hint to
832      * optimize the case when multiple roles prefer different buffer locations or data
833      * layouts.
834      */
835     float probability = 0.0f;
836 };
837 
838 /**
839  * Inputs to be sent to and outputs to be retrieved from a prepared model.
840  *
841  * A Request serves two primary tasks:
842  * 1) Provides the input and output data to be used when executing the model.
843  * 2) Specifies any updates to the input operand metadata that were left
844  *    unspecified at model preparation time.
845  *
846  * An output must not overlap with any other output, with an input, or
847  * with an operand of lifetime CONSTANT_REFERENCE.
848  */
849 struct Request {
850     /**
851      * Metadata information specifying the location of the input or output data and
852      * any updates to the input or output operand.
853      */
854     struct Argument {
855         enum class LifeTime {
856             POOL = 0,
857             NO_VALUE = 1,
858             POINTER = 2,
859         };
860 
861         LifeTime lifetime{};
862 
863         /**
864          * The location within one of the memory pools passed in the Request.
865          */
866         DataLocation location;
867 
868         /**
869          * Updated dimension information.
870          *
871          * If dimensions.size() > 0, dimension information was provided
872          * along with the argument. This can be the case for models that
873          * accept inputs of varying size. This can't change the rank, just
874          * the value of the dimensions that were unspecified in the
875          * model. If dimensions.size() > 0, then all dimensions must be
876          * specified here; and any dimension that was specified in the
877          * model must have the same value here.
878          *
879          * If the dimensions in the model are not fully specified, then
880          * they must be fully specified here, unless hasNoValue is set to
881          * true. If the dimensions in the model are fully specified, then
882          * either dimensions.size() may be 0, or the dimensions in the
883          * model must be identical to the dimensions here.
884          */
885         Dimensions dimensions;
886     };
887 
888     /**
889      * Specifies a driver-managed buffer. It is the token corresponding to an
890      * IBuffer returned from IDevice::allocate, and is specific to the IDevice
891      * object.
892      */
893     enum class MemoryDomainToken : uint32_t {};
894 
895     /**
896      * A memory pool.
897      */
898     using MemoryPool = std::variant<SharedMemory, MemoryDomainToken, SharedBuffer>;
899 
900     /**
901      * Input data and information to be used in the execution of a prepared
902      * model.
903      *
904      * The index of the input corresponds to the index in Model::main::inputIndexes.
905      *   E.g., inputs[i] corresponds to Model::main::inputIndexes[i].
906      */
907     std::vector<Argument> inputs;
908 
909     /**
910      * Output data and information to be used in the execution of a prepared
911      * model.
912      *
913      * The index of the output corresponds to the index in Model::main::outputIndexes.
914      *   E.g., outputs[i] corresponds to Model::main::outputIndexes[i].
915      */
916     std::vector<Argument> outputs;
917 
918     /**
919      * A collection of memory pools containing operand data for both the
920      * inputs and the outputs to a model.
921      */
922     std::vector<MemoryPool> pools;
923 };
924 
925 // Representation of sync_fence.
926 class SyncFence {
927    public:
928     static SyncFence createAsSignaled();
929     static SyncFence create(base::unique_fd fd);
930     static Result<SyncFence> create(SharedHandle syncFence);
931 
932     // The function syncWait() has the same semantics as the system function
933     // ::sync_wait(), except that the syncWait() return value is semantically
934     // richer.
935     enum class FenceState {
936         ACTIVE,    // fence has not been signaled
937         SIGNALED,  // fence has been signaled
938         ERROR,     // fence has been placed in the error state
939         UNKNOWN,   // either bad argument passed to syncWait(), or internal error
940     };
941     using Timeout = std::chrono::duration<int, std::milli>;
942     using OptionalTimeout = std::optional<Timeout>;
943 
944     FenceState syncWait(OptionalTimeout optionalTimeout) const;
945 
946     SharedHandle getSharedHandle() const;
947     bool hasFd() const;
948     int getFd() const;
949 
950    private:
951     explicit SyncFence(SharedHandle syncFence);
952 
953     SharedHandle mSyncFence;
954 };
955 
956 using Clock = base::boot_clock;
957 
958 using Duration = std::chrono::nanoseconds;
959 using OptionalDuration = std::optional<Duration>;
960 
961 using TimePoint = std::chrono::time_point<Clock, Duration>;
962 using OptionalTimePoint = std::optional<TimePoint>;
963 
964 /**
965  * Timing information measured during execution. Each time is a duration from
966  * the beginning of some task to the end of that task, including time when that
967  * task is not active (for example, preempted by some other task, or
968  * waiting for some resource to become available).
969  *
970  * Times are measured in nanoseconds.
971  */
972 struct Timing {
973     /** Execution time on device (not driver, which runs on host processor). */
974     OptionalDuration timeOnDevice;
975     /** Execution time in driver (including time on device). */
976     OptionalDuration timeInDriver;
977 };
978 
979 // Returns status, timingLaunched, timingFenced
980 using ExecuteFencedInfoCallback = std::function<GeneralResult<std::pair<Timing, Timing>>()>;
981 
982 enum class Version { ANDROID_OC_MR1, ANDROID_P, ANDROID_Q, ANDROID_R, ANDROID_S, CURRENT_RUNTIME };
983 
984 // Describes the memory preference of an operand.
985 struct MemoryPreference {
986     // Must be a power of 2.
987     // For pointer buffers, the alignment is satisfied if the address of the pointer is a multiple
988     // of the "alignment" value. For memory pools, the alignment is satisfied if the offset of the
989     // sub-region specified by DataLocation is a multiple of the "alignment" value.
990     uint32_t alignment;
991     // Must be a power of 2.
992     // For both pointer buffers and memory pools, the padding is satisfied if the padded length is
993     // greater than or equal to the raw size of the operand (i.e. the size of an element multiplied
994     // by the number of elements) rounding up to a multiple of the "padding" value. In DataLocation,
995     // the padded length equals to the sum of the length and padding fields.
996     uint32_t padding;
997 };
998 
999 }  // namespace android::nn
1000 
1001 #endif  // ANDROID_FRAMEWORKS_ML_NN_COMMON_NNAPI_TYPES_H
1002