• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// WARNING: Until b/191428000 is fixed you need to manually generate and update
16// the generated flatbuffer code when modifying this file. See BUILD for more
17// information.
18
19// This schema defines how to configure TFLite for delegation. These
20// definitions can be used in multiple ways: as output of a compatibility list,
21// in benchmarking tools and to decouple delegate instantiation from code.
22//
23// The schema is work-in-progress, covering the most broadly used delegates and
24// options.
25
26syntax = "proto2";
27
28package tflite.proto;
29
30// ExecutionPreference is used to match accelerators against the preferences of
31// the current application or usecase. Some of the values here can appear both
32// in the compatibility list and as input, some only as input.
33//
34// These are separate from NNAPIExecutionPreference - the compatibility list
35// design doesn't assume a one-to-one mapping between which usecases
36// compatibility list entries have been developed for and what settings are used
37// for NNAPI.
38enum ExecutionPreference {
39  // Match any selected preference. Allowlist (semantically - value is same as
40  // on input).
41  ANY = 0;
42  // Match low latency preference. Both compatibility list and input.
43  LOW_LATENCY = 1;
44  // Math low power preference. Both compatibility list and input.
45  LOW_POWER = 2;
46  // Never accelerate. Can be used for input to compatibility list or for
47  // standalone Acceleration configuration.
48  FORCE_CPU = 3;
49}
50
51// TFLite accelerator to use.
52enum Delegate {
53  NONE = 0;
54
55  NNAPI = 1;
56  GPU = 2;
57  HEXAGON = 3;
58  XNNPACK = 4;
59  // The EdgeTpu in Pixel devices.
60  EDGETPU = 5;
61  // The Coral EdgeTpu Dev Board / USB accelerator.
62  EDGETPU_CORAL = 6;
63  // Apple CoreML.
64  CORE_ML = 7;
65}
66
67enum NNAPIExecutionPreference {
68  // Undefined.
69  UNDEFINED = 0;
70  // Prefer executing in a way that minimizes battery drain.
71  NNAPI_LOW_POWER = 1;
72  // Prefer returning a single answer as fast as possible, even if this causes
73  // more power consumption.
74  NNAPI_FAST_SINGLE_ANSWER = 2;
75  // Prefer maximizing the throughput of successive frames, for example when
76  // processing successive frames coming from the camera.
77  NNAPI_SUSTAINED_SPEED = 3;
78}
79
80enum NNAPIExecutionPriority {
81  NNAPI_PRIORITY_UNDEFINED = 0;
82  NNAPI_PRIORITY_LOW = 1;
83  NNAPI_PRIORITY_MEDIUM = 2;
84  NNAPI_PRIORITY_HIGH = 3;
85}
86
87// One possible acceleration configuration.
88message ComputeSettings {
89  // Which preference to use this accelerator for.
90  optional ExecutionPreference preference = 1;
91  // How to configure TFLite
92  optional TFLiteSettings tflite_settings = 2;
93  // Identifiers to use for instrumentation and telemetry.
94  optional string model_namespace_for_statistics = 3;
95  optional string model_identifier_for_statistics = 4;
96
97  // 'Maybe' acceleration: use mini-benchmark to select settings.
98  optional MinibenchmarkSettings settings_to_test_locally = 5;
99}
100
101// NNAPI delegate settings.
102message NNAPISettings {
103  // Which instance (NNAPI accelerator) to use. One driver may provide several
104  // accelerators (though a driver may also hide several back-ends behind one
105  // name, at the choice of the driver vendor).
106  // Note that driver introspection is only available in Android Q and later.
107  optional string accelerator_name = 1;
108
109  // NNAPI model compilation caching settings to be passed to
110  // tflite::StatefulNnApiDelegate
111  optional string cache_directory = 2;
112  optional string model_token = 3;
113
114  // NNAPI execution preference to pass. See
115  // https://developer.android.com/ndk/reference/group/neural-networks.html
116  optional NNAPIExecutionPreference execution_preference = 4;
117
118  // Number of instances to cache for the same model (for input size
119  // changes). This is mandatory for getting reasonable performance in that
120  // case.
121  optional int32 no_of_nnapi_instances_to_cache = 5;
122
123  // Deprecated; use the fallback_settings in TFLiteSettings.
124  //
125  // Whether to automatically fall back to TFLite CPU path.
126  optional FallbackSettings fallback_settings = 6 [deprecated = true];
127
128  // Whether to allow use of NNAPI CPU (nnapi-reference accelerator) on Android
129  // 10+ when an accelerator name is not specified. The NNAPI CPU typically
130  // performs less well than the TfLite built-in kernels; but allowing allows a
131  // model to be partially accelerated which may be a win.
132  optional bool allow_nnapi_cpu_on_android_10_plus = 7;
133
134  optional NNAPIExecutionPriority execution_priority = 8;
135
136  // Whether to allow dynamic dimension sizes without re-compilation.
137  // A tensor of with dynamic dimension must have a valid dims_signature
138  // defined.
139  // Only supported in NNAPI 1.1 and newer versions.
140  // WARNING: Setting this flag to true may result in model being rejected by
141  // accelerator. This should only be enabled if the target device supports
142  // dynamic dimensions of the model.
143  // By default this is set to false.
144  optional bool allow_dynamic_dimensions = 9;
145
146  // Whether to allow the NNAPI accelerator to optionally use lower-precision
147  // float16 (16-bit floating point) arithmetic when doing calculations on
148  // float32 (32-bit floating point).
149  optional bool allow_fp16_precision_for_fp32 = 10;
150
151  // Whether to use NNAPI Burst mode.
152  // Burst mode allows accelerators to efficiently manage resources, which
153  // would significantly reduce overhead especially if the same delegate
154  // instance is to be used for multiple inferences.
155  optional bool use_burst_computation = 11;
156
157  // Optional pointer to NNAPI Support Library provided pointer to
158  // NnApiSLDriverImplFL5 which can be used to construct the
159  // NNAPI delegate.
160  optional int64 support_library_handle = 12;
161}
162
163// Which GPU backend to select. Default behaviour on Android is to try OpenCL
164// and if it's not available fall back to OpenGL.
165enum GPUBackend {
166  UNSET = 0;
167  OPENCL = 1;
168  OPENGL = 2;
169  // Not yet supported.
170  // VULKAN = 3;
171  // METAL = 4;
172}
173
174// GPU inference priorities define relative priorities given by the GPU delegate
175// to different client needs.
176// Corresponds to TfLiteGpuInferencePriority.
177enum GPUInferencePriority {
178  GPU_PRIORITY_AUTO = 0;
179  GPU_PRIORITY_MAX_PRECISION = 1;
180  GPU_PRIORITY_MIN_LATENCY = 2;
181  GPU_PRIORITY_MIN_MEMORY_USAGE = 3;
182}
183
184// GPU inference preference for initialization time vs. inference time.
185// Corresponds to TfLiteGpuInferenceUsage.
186enum GPUInferenceUsage {
187  // Delegate will be used only once, therefore, bootstrap/init time should
188  // be taken into account.
189  GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER = 0;
190
191  // Prefer maximizing the throughput. Same delegate will be used repeatedly on
192  // multiple inputs.
193  GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED = 1;
194}
195
196// GPU Delegate settings.
197//
198// See
199// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/delegate.h
200message GPUSettings {
201  // Ignored if inference_priority1/2/3 are set.
202  optional bool is_precision_loss_allowed = 1;
203  optional bool enable_quantized_inference = 2 [default = true];
204  optional GPUBackend force_backend = 3;
205
206  // Ordered priorities provide better control over desired semantics,
207  // where priority(n) is more important than priority(n+1). Therefore,
208  // each time inference engine needs to make a decision, it uses
209  // ordered priorities to do so.
210  //
211  // Default values correspond to GPU_PRIORITY_AUTO.
212  // AUTO priority can only be used when higher priorities are fully specified.
213  // For example:
214  //   VALID:   priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO
215  //   VALID:   priority1 = MIN_LATENCY, priority2 = MAX_PRECISION,
216  //            priority3 = AUTO
217  //   INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO
218  //   INVALID: priority1 = MIN_LATENCY, priority2 = AUTO,
219  //            priority3 = MAX_PRECISION
220  // Invalid priorities will result in error.
221  //
222  // For more information, see TfLiteGpuDelegateOptionsV2.
223  optional GPUInferencePriority inference_priority1 = 4
224      [default = GPU_PRIORITY_AUTO];
225  optional GPUInferencePriority inference_priority2 = 5
226      [default = GPU_PRIORITY_AUTO];
227  optional GPUInferencePriority inference_priority3 = 6
228      [default = GPU_PRIORITY_AUTO];
229
230  // Whether to optimize for compilation+execution time or execution time only.
231  optional GPUInferenceUsage inference_preference = 7;
232
233  // Model serialization. Setting both of these fields will also set the
234  // TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_SERIALIZATION flag on the delegate.
235  //
236  // GPU model serialization directory passed in TfLiteGpuDelegateOptionsV2.
237  // This should be set to the application's code cache directory so that it can
238  // not be accessed by other apps and is correctly deleted on app updates.
239  // tflite::StatefulNnApiDelegate
240  optional string cache_directory = 8;
241  // Normally, the model name with version number should be provided here, since
242  // each model needs an unique ID to avoid cache collision.
243  optional string model_token = 9;
244}
245
246// Hexagon Delegate settings.
247//
248// See
249// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/hexagon/hexagon_delegate.h
250message HexagonSettings {
251  optional int32 debug_level = 1;
252  optional int32 powersave_level = 2;
253  optional bool print_graph_profile = 3;
254  optional bool print_graph_debug = 4;
255}
256
257// XNNPack Delegate settings.
258//
259// See
260// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h
261enum XNNPackFlags {
262  // These flags match the flags in xnnpack_delegate.h.
263  TFLITE_XNNPACK_DELEGATE_NO_FLAGS = 0;
264  // Enable fast signed integer XNNpack kernels.
265  TFLITE_XNNPACK_DELEGATE_FLAG_QS8 = 1;
266  // Enable fast unsigned integer XNNpack kernels.
267  TFLITE_XNNPACK_DELEGATE_FLAG_QU8 = 2;
268  // Enable both, signed and unsigned integer XNNpack kernels.
269  TFLITE_XNNPACK_DELEGATE_FLAG_QS8_QU8 = 3;
270  // Force 16-bit floating point inference.
271  TFLITE_XNNPACK_DELEGATE_FLAG_FORCE_FP16 = 4;
272}
273
274message XNNPackSettings {
275  optional int32 num_threads = 1;
276  optional XNNPackFlags flags = 2 [default = TFLITE_XNNPACK_DELEGATE_NO_FLAGS];
277}
278
279// CoreML Delegate settings.
280//
281// See
282// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/coreml/coreml_delegate.h
283message CoreMLSettings {
284  // Note the enum order change from the above header for better proto practice.
285  enum EnabledDevices {
286    // Always create Core ML delegate.
287    DEVICES_ALL = 0;
288    // Create Core ML delegate only on devices with Apple Neural Engine.
289    DEVICES_WITH_NEURAL_ENGINE = 1;
290  }
291  // Only create delegate when Neural Engine is available on the device.
292  optional EnabledDevices enabled_devices = 1;
293
294  // Specifies target Core ML version for model conversion.
295  // Core ML 3 come with a lot more ops, but some ops (e.g. reshape) is not
296  // delegated due to input rank constraint.
297  // if not set to one of the valid versions, the delegate will use highest
298  // version possible in the platform.
299  // Valid versions: (2, 3)
300  optional int32 coreml_version = 2;
301  // This sets the maximum number of Core ML delegates created.
302  // Each graph corresponds to one delegated node subset in the
303  // TFLite model. Set this to 0 to delegate all possible partitions.
304  optional int32 max_delegated_partitions = 3 [default = 0];
305  // This sets the minimum number of nodes per partition delegated with
306  // Core ML delegate. Defaults to 2.
307  optional int32 min_nodes_per_partition = 4 [default = 2];
308}
309
310// EdgeTPU device spec.
311//
312message EdgeTpuDeviceSpec {
313  // EdgeTPU platform types.
314  enum PlatformType {
315    MMIO = 0;
316    REFERENCE = 1;
317    SIMULATOR = 2;
318    REMOTE_SIMULATOR = 3;
319  }
320
321  // Execution platform for the EdgeTPU device.
322  optional PlatformType platform_type = 1;
323
324  // Number of chips to use for the EdgeTPU device.
325  optional int32 num_chips = 2;
326
327  // Paths to the EdgeTPU devices;
328  repeated string device_paths = 3;
329
330  // Chip family used by the EdgeTpu device.
331  optional int32 chip_family = 4;
332}
333
334// Generic definitions of EdgeTPU power states.
335enum EdgeTpuPowerState {
336  // Undefined power state.
337  UNDEFINED_POWERSTATE = 0;
338
339  // TPU core is off but control cluster is on.
340  TPU_CORE_OFF = 1;
341
342  // A non-active low-power state that has much smaller transition time to
343  // active compared to off.
344  READY = 2;
345
346  // Minimum power active state.
347  ACTIVE_MIN_POWER = 3;
348
349  // Very low performance, very low power.
350  ACTIVE_VERY_LOW_POWER = 4;
351
352  // Low performance, low power.
353  ACTIVE_LOW_POWER = 5;
354
355  // The normal performance and power. This setting usually provides the
356  // optimal perf/power trade-off for the average use-case.
357  ACTIVE = 6;
358
359  // Maximum performance level. Potentially higher power and thermal. This
360  // setting may not be allowed in production depending on the system.
361  OVER_DRIVE = 7;
362}
363
364message EdgeTpuInactivePowerConfig {
365  // Inactive power states between inferences.
366  optional EdgeTpuPowerState inactive_power_state = 1;
367
368  // Inactive timeout in microseconds between inferences.
369  optional int64 inactive_timeout_us = 2;
370}
371
372// EdgeTPU Delegate settings.
373//
374message EdgeTpuSettings {
375  // Float truncation types for EdgeTPU.
376  enum FloatTruncationType {
377    UNSPECIFIED = 0;
378    NO_TRUNCATION = 1;
379    BFLOAT16 = 2;
380    HALF = 3;
381  }
382
383  enum QosClass {
384    QOS_UNDEFINED = 0;
385    BEST_EFFORT = 1;
386    REALTIME = 2;
387  }
388
389  // Target inference power state for running the model.
390  optional EdgeTpuPowerState inference_power_state = 1;
391
392  // Inactive power states between inferences.
393  repeated EdgeTpuInactivePowerConfig inactive_power_configs = 2;
394
395  // Priority for the inference request.
396  optional int32 inference_priority = 3 [default = -1];
397
398  // Device spec for creating the EdgeTpu device.
399  optional EdgeTpuDeviceSpec edgetpu_device_spec = 4;
400
401  // A unique identifier of the input TfLite model.
402  optional string model_token = 5;
403
404  // Float truncation type for EdgeTPU.
405  optional FloatTruncationType float_truncation_type = 6;
406
407  // QoS class to determine chunking size for PRO onward.
408  optional QosClass qos_class = 7 [default = QOS_UNDEFINED];
409}
410
411// Coral Dev Board / USB accelerator delegate settings.
412//
413// See
414// https://github.com/google-coral/edgetpu/blob/master/libedgetpu/edgetpu_c.h
415message CoralSettings {
416  enum Performance {
417    UNDEFINED = 0;
418    MAXIMUM = 1;
419    HIGH = 2;
420    MEDIUM = 3;
421    LOW = 4;
422  }
423
424  // The Edge Tpu device to be used. See
425  // https://github.com/google-coral/libcoral/blob/982426546dfa10128376d0c24fd8a8b161daac97/coral/tflite_utils.h#L131-L137
426  optional string device = 1;
427  // The desired performance level. This setting adjusts the internal clock
428  // rate to achieve different performance / power balance. Higher performance
429  // values improve speed, but increase power usage.
430  optional Performance performance = 2 [default = MAXIMUM];
431  // If true, always perform device firmware update (DFU) after reset. DFU is
432  // usually only necessary after power cycle.
433  optional bool usb_always_dfu = 3;
434  // The maximum bulk in queue length. Larger queue length may improve USB
435  // performance on the direction from device to host. When not specified (or
436  // zero), `usb_max_bulk_in_queue_length` will default to 32 according to the
437  // current EdgeTpu Coral implementation.
438  optional int32 usb_max_bulk_in_queue_length = 4;
439}
440
441message CPUSettings {
442  // Set to -1 to let the interpreter choose. Otherwise, must be > 0.
443  optional int32 num_threads = 1 [default = -1];
444}
445
446// How to configure TFLite.
447message TFLiteSettings {
448  // Which delegate to use.
449  optional Delegate delegate = 1;
450
451  // How to configure the chosen delegate.
452  // (In principle we would like to use 'oneof', but flatc turns that into an
453  // nested anonymous table rather than a union. See
454  // https://github.com/google/flatbuffers/issues/4628).
455  optional NNAPISettings nnapi_settings = 2;
456  optional GPUSettings gpu_settings = 3;
457  optional HexagonSettings hexagon_settings = 4;
458  optional XNNPackSettings xnnpack_settings = 5;
459  optional CoreMLSettings coreml_settings = 11;
460
461  // How to configure CPU execution.
462  optional CPUSettings cpu_settings = 6;
463
464  // Shared delegation settings.
465  optional int32 max_delegated_partitions = 7;
466
467  // For configuring the EdgeTpuDelegate.
468  optional EdgeTpuSettings edgetpu_settings = 8;
469
470  // For configuring the Coral EdgeTpu Delegate.
471  optional CoralSettings coral_settings = 10;
472
473  // Whether to automatically fall back to TFLite CPU path.
474  optional FallbackSettings fallback_settings = 9;
475
476  // Whether to disable default delegates (XNNPack).
477  optional bool disable_default_delegates = 12;
478}
479
480// Whether to automatically fallback to TFLite CPU path on delegation errors.
481//
482// Typically fallback is enabled in production use but disabled in tests and
483// benchmarks to ensure they test the intended path.
484message FallbackSettings {
485  // Whether to allow automatically falling back to TfLite CPU path on
486  // compilation failure. Default is not allowing automatic fallback.
487  //
488  // This is useful in naive production usecases where the caller would prefer
489  // for the model to run even if it's not accelerated. More advanced users will
490  // implement fallback themselves; e.g., by using a different model on CPU.
491  //
492  // Note that compilation errors may occur either at initial
493  // ModifyGraphWithDelegate() time, or when calling AllocateTensors() after
494  // resizing.
495  optional bool allow_automatic_fallback_on_compilation_error = 7;
496  // Whether to allow automatically falling back to TfLite CPU path on
497  // execution error. Default is not allowing automatic fallback.
498  //
499  // Experimental, use with care (only when you have complete control over the
500  // client code).
501  //
502  // The caveat above for compilation error holds.  Additionally, execution-time
503  // errors are harder to handle automatically as they require invalidating the
504  // TfLite interpreter which most client code has not been designed to deal
505  // with.
506  optional bool allow_automatic_fallback_on_execution_error = 8;
507}
508
509// On-device mini-benchmark result storage. The following definitions are used
510// to keep an append-only log of benchmark results on-device. (Hence there is
511// single top-level event that is used for all data).
512//
513// These definitions don't need a proto-to-flatbuffer conversion, since they are
514// not used for specifying configuration in the Tasks library.
515
516// Which stage of benchmarking the event is for.
517// There might be multiple events with the same type, if a benchmark is run
518// multiple times.
519enum BenchmarkEventType {
520  UNDEFINED_BENCHMARK_EVENT_TYPE = 0;
521  // Benchmark start. A start without an end can be interpreted as a test that
522  // has crashed or hung.
523  START = 1;
524  // Benchmarking completion. A model was successfully loaded, acceleration
525  // configured and inference run without errors. There may still be an issue
526  // with correctness of results, or with performance.
527  END = 2;
528  // Benchmark was not completed due to an error. The error may be a handled
529  // error (e.g., failure in a delegate), or a crash.
530  ERROR = 3;
531  // Benchmark data has been sent for logging.
532  LOGGED = 4;
533  // Benchmark encountered an error but was able to continue. The error is not
534  // related to the model execution but to the mini-benchmark logic. An example
535  // of error is a failure when trying to set the CPU affinity of the benchmark
536  // runner process.
537  RECOVERED_ERROR = 5;
538}
539
540// A correctness metric from a benchmark, for example KL-divergence between
541// known-good CPU output and on-device output. These are primarily used for
542// telemetry and monitored server-side.
543message BenchmarkMetric {
544  optional string name = 1;
545  repeated float values = 2 [packed = true];
546}
547
548// Outcome of a successfully complete benchmark run. This information is
549// intended to both be used on-device to select best compute configuration as
550// well as sent to server for monitoring.
551//
552// Used with event type END.
553message BenchmarkResult {
554  // Time to load model and apply acceleration. Initialization may get run
555  // multiple times to get information on variance.
556  repeated int64 initialization_time_us = 1 [packed = true];
557  // Time to run inference (call Invoke()). Inference may get run multiple times
558  // to get information on variance.
559  repeated int64 inference_time_us = 2 [packed = true];
560  // Maximum memory used. Measures size of application heap (does not
561  // necessarily take into account driver-side allocation.
562  optional int32 max_memory_kb = 3;
563  // Whether the inference produced correct results (validation graph output
564  // 'ok' for all test inputs). Used on-device to disallow configurations that
565  // produce incorrect results (e.g., due to OpenCL driver bugs).
566  optional bool ok = 4;
567  // Metrics that were used to determine the 'ok' status.
568  repeated BenchmarkMetric metrics = 5;
569}
570
571// A handled error.
572message ErrorCode {
573  // Which delegate the error comes from (or NONE, if it comes from the tflite
574  // framework).
575  optional Delegate source = 1;
576  // What the tflite level error is.
577  optional int32 tflite_error = 2;
578  // What the underlying error is (e.g., NNAPI or OpenGL error).
579  optional int64 underlying_api_error = 3;
580}
581
582// When during benchmark execution an error occurred.
583enum BenchmarkStage {
584  UNKNOWN = 0;
585  // During model loading or delegation.
586  INITIALIZATION = 1;
587  // During inference.
588  INFERENCE = 2;
589}
590
591// An error that occurred during benchmarking.
592//
593// Used with event type ERROR.
594message BenchmarkError {
595  // How far benchmarking got.
596  optional BenchmarkStage stage = 1;
597  // Process exit code.
598  optional int32 exit_code = 2;
599  // Signal the process received.
600  optional int32 signal = 3;
601  // Handled tflite error.
602  repeated ErrorCode error_code = 4;
603  // Mini-benchmark error code.
604  optional int32 mini_benchmark_error_code = 5;
605}
606
607// Top-level benchmarking event stored on-device. All events for a model are
608// parsed to detect the status.
609message BenchmarkEvent {
610  // Which settings were used for benchmarking.
611  optional TFLiteSettings tflite_settings = 1;
612  // Type of the event.
613  optional BenchmarkEventType event_type = 2;
614  // Result of benchmark, used when type is END.
615  optional BenchmarkResult result = 3;
616  // Error during benchmark, used when type is ERROR.
617  optional BenchmarkError error = 4;
618  // Start timestamps. These are used for
619  // 1. Checking whether a test was started but not completed within a given
620  // deadline.
621  // 2. Optionally, telemetry timestamps.
622  optional int64 boottime_us = 5;
623  optional int64 wallclock_us = 6;
624}
625
626// Represent the decision on the best acceleration from the mini-benchmark.
627message BestAccelerationDecision {
628  // Number of events used to take the decision.
629  // Using just the size instaed of the full list of events to save space.
630  optional int32 number_of_source_events = 1;
631
632  // Event with min latency in the source ones.
633  optional BenchmarkEvent min_latency_event = 2;
634
635  // Min latency as read from min_latency_event.
636  optional int64 min_inference_time_us = 3;
637}
638
639// Represent a failure during the initialization of the mini-benchmark.
640message BenchmarkInitializationFailure {
641  // Status code returned by the mini-benchmark initialization function.
642  optional int32 initialization_status = 1;
643}
644
645// Events generated by the mini-benchmark before and after triggering
646// the different configuration-specific benchmarks
647message MiniBenchmarkEvent {
648  // Not using oneof because of the way the generated cpp code.
649  // See comment above on TfLite settings for details.
650
651  // If set to true, this event is used to mark all previous events in the
652  // mini-benchmark internal storage as read and one of the other fields
653  // in this message will have a value.
654  optional bool is_log_flushing_event = 1;
655  // Event generated when a best acceleration decision is taken.
656  optional BestAccelerationDecision best_acceleration_decision = 2;
657  // Reports a failure during mini-benchmark initialization.
658  optional BenchmarkInitializationFailure initialization_failure = 3;
659  // Event generated while benchmarking the different settings to test locally.
660  optional BenchmarkEvent benchmark_event = 4;
661}
662
663// How to access the model for mini-benchmark.
664// Since mini-benchmark runs in a separate process, it can not access an
665// in-memory model. It can read the model either from a file or from a file
666// descriptor. The file descriptor typically comes from the Android asset
667// manager.
668//
669// Users should set either filename, or all of fd, offset and length.
670message ModelFile {
671  // Filename for reading model from.
672  optional string filename = 1;
673  // File descriptor to read model from.
674  optional int64 fd = 2;
675  // Offset for model in file descriptor.
676  optional int64 offset = 3;
677  // Length of model in file descriptor.
678  optional int64 length = 4;
679}
680
681// Where to store mini-benchmark state.
682message BenchmarkStoragePaths {
683  // Base path to the files used to store benchmark results in. Two files
684  // will be generated: one with the given path and an extra file to store
685  // events related to best acceleration results at path storage_file_path +
686  // ".extra.fb". Must be specific to the model.
687  // Note on Android, this should be the code cache directory.
688  optional string storage_file_path = 1;
689
690  // Path to a directory for intermediate files (lock files, extracted
691  // binaries).
692  // Note on Android, this typically is the data cache directory (i.e. the one
693  // returned by `getCacheDir()`).
694  optional string data_directory_path = 2;
695}
696
697// How to run a minibenchmark.
698message MinibenchmarkSettings {
699  // Which settings to test. This would typically be filled in from an
700  // allowlist.
701  repeated TFLiteSettings settings_to_test = 1;
702  // How to access the model. This would typically be set dynamically, as it
703  // depends on the application folder and/or runtime state.
704  optional ModelFile model_file = 2;
705  // Where to store state. This would typically be set dynamically, as it
706  // depends on the application folder.
707  optional BenchmarkStoragePaths storage_paths = 3;
708}
709