1// Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// WARNING: Until b/191428000 is fixed you need to manually generate and update 16// the generated flatbuffer code when modifying this file. See BUILD for more 17// information. 18 19// This schema defines how to configure TFLite for delegation. These 20// definitions can be used in multiple ways: as output of a compatibility list, 21// in benchmarking tools and to decouple delegate instantiation from code. 22// 23// The schema is work-in-progress, covering the most broadly used delegates and 24// options. 25 26syntax = "proto2"; 27 28package tflite.proto; 29 30// ExecutionPreference is used to match accelerators against the preferences of 31// the current application or usecase. Some of the values here can appear both 32// in the compatibility list and as input, some only as input. 33// 34// These are separate from NNAPIExecutionPreference - the compatibility list 35// design doesn't assume a one-to-one mapping between which usecases 36// compatibility list entries have been developed for and what settings are used 37// for NNAPI. 38enum ExecutionPreference { 39 // Match any selected preference. Allowlist (semantically - value is same as 40 // on input). 41 ANY = 0; 42 // Match low latency preference. Both compatibility list and input. 43 LOW_LATENCY = 1; 44 // Math low power preference. Both compatibility list and input. 45 LOW_POWER = 2; 46 // Never accelerate. Can be used for input to compatibility list or for 47 // standalone Acceleration configuration. 48 FORCE_CPU = 3; 49} 50 51// TFLite accelerator to use. 52enum Delegate { 53 NONE = 0; 54 55 NNAPI = 1; 56 GPU = 2; 57 HEXAGON = 3; 58 XNNPACK = 4; 59 // The EdgeTpu in Pixel devices. 60 EDGETPU = 5; 61 // The Coral EdgeTpu Dev Board / USB accelerator. 62 EDGETPU_CORAL = 6; 63 // Apple CoreML. 64 CORE_ML = 7; 65} 66 67enum NNAPIExecutionPreference { 68 // Undefined. 69 UNDEFINED = 0; 70 // Prefer executing in a way that minimizes battery drain. 71 NNAPI_LOW_POWER = 1; 72 // Prefer returning a single answer as fast as possible, even if this causes 73 // more power consumption. 74 NNAPI_FAST_SINGLE_ANSWER = 2; 75 // Prefer maximizing the throughput of successive frames, for example when 76 // processing successive frames coming from the camera. 77 NNAPI_SUSTAINED_SPEED = 3; 78} 79 80enum NNAPIExecutionPriority { 81 NNAPI_PRIORITY_UNDEFINED = 0; 82 NNAPI_PRIORITY_LOW = 1; 83 NNAPI_PRIORITY_MEDIUM = 2; 84 NNAPI_PRIORITY_HIGH = 3; 85} 86 87// One possible acceleration configuration. 88message ComputeSettings { 89 // Which preference to use this accelerator for. 90 optional ExecutionPreference preference = 1; 91 // How to configure TFLite 92 optional TFLiteSettings tflite_settings = 2; 93 // Identifiers to use for instrumentation and telemetry. 94 optional string model_namespace_for_statistics = 3; 95 optional string model_identifier_for_statistics = 4; 96 97 // 'Maybe' acceleration: use mini-benchmark to select settings. 98 optional MinibenchmarkSettings settings_to_test_locally = 5; 99} 100 101// NNAPI delegate settings. 102message NNAPISettings { 103 // Which instance (NNAPI accelerator) to use. One driver may provide several 104 // accelerators (though a driver may also hide several back-ends behind one 105 // name, at the choice of the driver vendor). 106 // Note that driver introspection is only available in Android Q and later. 107 optional string accelerator_name = 1; 108 109 // NNAPI model compilation caching settings to be passed to 110 // tflite::StatefulNnApiDelegate 111 optional string cache_directory = 2; 112 optional string model_token = 3; 113 114 // NNAPI execution preference to pass. See 115 // https://developer.android.com/ndk/reference/group/neural-networks.html 116 optional NNAPIExecutionPreference execution_preference = 4; 117 118 // Number of instances to cache for the same model (for input size 119 // changes). This is mandatory for getting reasonable performance in that 120 // case. 121 optional int32 no_of_nnapi_instances_to_cache = 5; 122 123 // Deprecated; use the fallback_settings in TFLiteSettings. 124 // 125 // Whether to automatically fall back to TFLite CPU path. 126 optional FallbackSettings fallback_settings = 6 [deprecated = true]; 127 128 // Whether to allow use of NNAPI CPU (nnapi-reference accelerator) on Android 129 // 10+ when an accelerator name is not specified. The NNAPI CPU typically 130 // performs less well than the TfLite built-in kernels; but allowing allows a 131 // model to be partially accelerated which may be a win. 132 optional bool allow_nnapi_cpu_on_android_10_plus = 7; 133 134 optional NNAPIExecutionPriority execution_priority = 8; 135 136 // Whether to allow dynamic dimension sizes without re-compilation. 137 // A tensor of with dynamic dimension must have a valid dims_signature 138 // defined. 139 // Only supported in NNAPI 1.1 and newer versions. 140 // WARNING: Setting this flag to true may result in model being rejected by 141 // accelerator. This should only be enabled if the target device supports 142 // dynamic dimensions of the model. 143 // By default this is set to false. 144 optional bool allow_dynamic_dimensions = 9; 145 146 // Whether to allow the NNAPI accelerator to optionally use lower-precision 147 // float16 (16-bit floating point) arithmetic when doing calculations on 148 // float32 (32-bit floating point). 149 optional bool allow_fp16_precision_for_fp32 = 10; 150 151 // Whether to use NNAPI Burst mode. 152 // Burst mode allows accelerators to efficiently manage resources, which 153 // would significantly reduce overhead especially if the same delegate 154 // instance is to be used for multiple inferences. 155 optional bool use_burst_computation = 11; 156 157 // Optional pointer to NNAPI Support Library provided pointer to 158 // NnApiSLDriverImplFL5 which can be used to construct the 159 // NNAPI delegate. 160 optional int64 support_library_handle = 12; 161} 162 163// Which GPU backend to select. Default behaviour on Android is to try OpenCL 164// and if it's not available fall back to OpenGL. 165enum GPUBackend { 166 UNSET = 0; 167 OPENCL = 1; 168 OPENGL = 2; 169 // Not yet supported. 170 // VULKAN = 3; 171 // METAL = 4; 172} 173 174// GPU inference priorities define relative priorities given by the GPU delegate 175// to different client needs. 176// Corresponds to TfLiteGpuInferencePriority. 177enum GPUInferencePriority { 178 GPU_PRIORITY_AUTO = 0; 179 GPU_PRIORITY_MAX_PRECISION = 1; 180 GPU_PRIORITY_MIN_LATENCY = 2; 181 GPU_PRIORITY_MIN_MEMORY_USAGE = 3; 182} 183 184// GPU inference preference for initialization time vs. inference time. 185// Corresponds to TfLiteGpuInferenceUsage. 186enum GPUInferenceUsage { 187 // Delegate will be used only once, therefore, bootstrap/init time should 188 // be taken into account. 189 GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER = 0; 190 191 // Prefer maximizing the throughput. Same delegate will be used repeatedly on 192 // multiple inputs. 193 GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED = 1; 194} 195 196// GPU Delegate settings. 197// 198// See 199// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/delegate.h 200message GPUSettings { 201 // Ignored if inference_priority1/2/3 are set. 202 optional bool is_precision_loss_allowed = 1; 203 optional bool enable_quantized_inference = 2 [default = true]; 204 optional GPUBackend force_backend = 3; 205 206 // Ordered priorities provide better control over desired semantics, 207 // where priority(n) is more important than priority(n+1). Therefore, 208 // each time inference engine needs to make a decision, it uses 209 // ordered priorities to do so. 210 // 211 // Default values correspond to GPU_PRIORITY_AUTO. 212 // AUTO priority can only be used when higher priorities are fully specified. 213 // For example: 214 // VALID: priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO 215 // VALID: priority1 = MIN_LATENCY, priority2 = MAX_PRECISION, 216 // priority3 = AUTO 217 // INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO 218 // INVALID: priority1 = MIN_LATENCY, priority2 = AUTO, 219 // priority3 = MAX_PRECISION 220 // Invalid priorities will result in error. 221 // 222 // For more information, see TfLiteGpuDelegateOptionsV2. 223 optional GPUInferencePriority inference_priority1 = 4 224 [default = GPU_PRIORITY_AUTO]; 225 optional GPUInferencePriority inference_priority2 = 5 226 [default = GPU_PRIORITY_AUTO]; 227 optional GPUInferencePriority inference_priority3 = 6 228 [default = GPU_PRIORITY_AUTO]; 229 230 // Whether to optimize for compilation+execution time or execution time only. 231 optional GPUInferenceUsage inference_preference = 7; 232 233 // Model serialization. Setting both of these fields will also set the 234 // TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_SERIALIZATION flag on the delegate. 235 // 236 // GPU model serialization directory passed in TfLiteGpuDelegateOptionsV2. 237 // This should be set to the application's code cache directory so that it can 238 // not be accessed by other apps and is correctly deleted on app updates. 239 // tflite::StatefulNnApiDelegate 240 optional string cache_directory = 8; 241 // Normally, the model name with version number should be provided here, since 242 // each model needs an unique ID to avoid cache collision. 243 optional string model_token = 9; 244} 245 246// Hexagon Delegate settings. 247// 248// See 249// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/hexagon/hexagon_delegate.h 250message HexagonSettings { 251 optional int32 debug_level = 1; 252 optional int32 powersave_level = 2; 253 optional bool print_graph_profile = 3; 254 optional bool print_graph_debug = 4; 255} 256 257// XNNPack Delegate settings. 258// 259// See 260// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h 261enum XNNPackFlags { 262 // These flags match the flags in xnnpack_delegate.h. 263 TFLITE_XNNPACK_DELEGATE_NO_FLAGS = 0; 264 // Enable fast signed integer XNNpack kernels. 265 TFLITE_XNNPACK_DELEGATE_FLAG_QS8 = 1; 266 // Enable fast unsigned integer XNNpack kernels. 267 TFLITE_XNNPACK_DELEGATE_FLAG_QU8 = 2; 268 // Enable both, signed and unsigned integer XNNpack kernels. 269 TFLITE_XNNPACK_DELEGATE_FLAG_QS8_QU8 = 3; 270 // Force 16-bit floating point inference. 271 TFLITE_XNNPACK_DELEGATE_FLAG_FORCE_FP16 = 4; 272} 273 274message XNNPackSettings { 275 optional int32 num_threads = 1; 276 optional XNNPackFlags flags = 2 [default = TFLITE_XNNPACK_DELEGATE_NO_FLAGS]; 277} 278 279// CoreML Delegate settings. 280// 281// See 282// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/coreml/coreml_delegate.h 283message CoreMLSettings { 284 // Note the enum order change from the above header for better proto practice. 285 enum EnabledDevices { 286 // Always create Core ML delegate. 287 DEVICES_ALL = 0; 288 // Create Core ML delegate only on devices with Apple Neural Engine. 289 DEVICES_WITH_NEURAL_ENGINE = 1; 290 } 291 // Only create delegate when Neural Engine is available on the device. 292 optional EnabledDevices enabled_devices = 1; 293 294 // Specifies target Core ML version for model conversion. 295 // Core ML 3 come with a lot more ops, but some ops (e.g. reshape) is not 296 // delegated due to input rank constraint. 297 // if not set to one of the valid versions, the delegate will use highest 298 // version possible in the platform. 299 // Valid versions: (2, 3) 300 optional int32 coreml_version = 2; 301 // This sets the maximum number of Core ML delegates created. 302 // Each graph corresponds to one delegated node subset in the 303 // TFLite model. Set this to 0 to delegate all possible partitions. 304 optional int32 max_delegated_partitions = 3 [default = 0]; 305 // This sets the minimum number of nodes per partition delegated with 306 // Core ML delegate. Defaults to 2. 307 optional int32 min_nodes_per_partition = 4 [default = 2]; 308} 309 310// EdgeTPU device spec. 311// 312message EdgeTpuDeviceSpec { 313 // EdgeTPU platform types. 314 enum PlatformType { 315 MMIO = 0; 316 REFERENCE = 1; 317 SIMULATOR = 2; 318 REMOTE_SIMULATOR = 3; 319 } 320 321 // Execution platform for the EdgeTPU device. 322 optional PlatformType platform_type = 1; 323 324 // Number of chips to use for the EdgeTPU device. 325 optional int32 num_chips = 2; 326 327 // Paths to the EdgeTPU devices; 328 repeated string device_paths = 3; 329 330 // Chip family used by the EdgeTpu device. 331 optional int32 chip_family = 4; 332} 333 334// Generic definitions of EdgeTPU power states. 335enum EdgeTpuPowerState { 336 // Undefined power state. 337 UNDEFINED_POWERSTATE = 0; 338 339 // TPU core is off but control cluster is on. 340 TPU_CORE_OFF = 1; 341 342 // A non-active low-power state that has much smaller transition time to 343 // active compared to off. 344 READY = 2; 345 346 // Minimum power active state. 347 ACTIVE_MIN_POWER = 3; 348 349 // Very low performance, very low power. 350 ACTIVE_VERY_LOW_POWER = 4; 351 352 // Low performance, low power. 353 ACTIVE_LOW_POWER = 5; 354 355 // The normal performance and power. This setting usually provides the 356 // optimal perf/power trade-off for the average use-case. 357 ACTIVE = 6; 358 359 // Maximum performance level. Potentially higher power and thermal. This 360 // setting may not be allowed in production depending on the system. 361 OVER_DRIVE = 7; 362} 363 364message EdgeTpuInactivePowerConfig { 365 // Inactive power states between inferences. 366 optional EdgeTpuPowerState inactive_power_state = 1; 367 368 // Inactive timeout in microseconds between inferences. 369 optional int64 inactive_timeout_us = 2; 370} 371 372// EdgeTPU Delegate settings. 373// 374message EdgeTpuSettings { 375 // Float truncation types for EdgeTPU. 376 enum FloatTruncationType { 377 UNSPECIFIED = 0; 378 NO_TRUNCATION = 1; 379 BFLOAT16 = 2; 380 HALF = 3; 381 } 382 383 enum QosClass { 384 QOS_UNDEFINED = 0; 385 BEST_EFFORT = 1; 386 REALTIME = 2; 387 } 388 389 // Target inference power state for running the model. 390 optional EdgeTpuPowerState inference_power_state = 1; 391 392 // Inactive power states between inferences. 393 repeated EdgeTpuInactivePowerConfig inactive_power_configs = 2; 394 395 // Priority for the inference request. 396 optional int32 inference_priority = 3 [default = -1]; 397 398 // Device spec for creating the EdgeTpu device. 399 optional EdgeTpuDeviceSpec edgetpu_device_spec = 4; 400 401 // A unique identifier of the input TfLite model. 402 optional string model_token = 5; 403 404 // Float truncation type for EdgeTPU. 405 optional FloatTruncationType float_truncation_type = 6; 406 407 // QoS class to determine chunking size for PRO onward. 408 optional QosClass qos_class = 7 [default = QOS_UNDEFINED]; 409} 410 411// Coral Dev Board / USB accelerator delegate settings. 412// 413// See 414// https://github.com/google-coral/edgetpu/blob/master/libedgetpu/edgetpu_c.h 415message CoralSettings { 416 enum Performance { 417 UNDEFINED = 0; 418 MAXIMUM = 1; 419 HIGH = 2; 420 MEDIUM = 3; 421 LOW = 4; 422 } 423 424 // The Edge Tpu device to be used. See 425 // https://github.com/google-coral/libcoral/blob/982426546dfa10128376d0c24fd8a8b161daac97/coral/tflite_utils.h#L131-L137 426 optional string device = 1; 427 // The desired performance level. This setting adjusts the internal clock 428 // rate to achieve different performance / power balance. Higher performance 429 // values improve speed, but increase power usage. 430 optional Performance performance = 2 [default = MAXIMUM]; 431 // If true, always perform device firmware update (DFU) after reset. DFU is 432 // usually only necessary after power cycle. 433 optional bool usb_always_dfu = 3; 434 // The maximum bulk in queue length. Larger queue length may improve USB 435 // performance on the direction from device to host. When not specified (or 436 // zero), `usb_max_bulk_in_queue_length` will default to 32 according to the 437 // current EdgeTpu Coral implementation. 438 optional int32 usb_max_bulk_in_queue_length = 4; 439} 440 441message CPUSettings { 442 // Set to -1 to let the interpreter choose. Otherwise, must be > 0. 443 optional int32 num_threads = 1 [default = -1]; 444} 445 446// How to configure TFLite. 447message TFLiteSettings { 448 // Which delegate to use. 449 optional Delegate delegate = 1; 450 451 // How to configure the chosen delegate. 452 // (In principle we would like to use 'oneof', but flatc turns that into an 453 // nested anonymous table rather than a union. See 454 // https://github.com/google/flatbuffers/issues/4628). 455 optional NNAPISettings nnapi_settings = 2; 456 optional GPUSettings gpu_settings = 3; 457 optional HexagonSettings hexagon_settings = 4; 458 optional XNNPackSettings xnnpack_settings = 5; 459 optional CoreMLSettings coreml_settings = 11; 460 461 // How to configure CPU execution. 462 optional CPUSettings cpu_settings = 6; 463 464 // Shared delegation settings. 465 optional int32 max_delegated_partitions = 7; 466 467 // For configuring the EdgeTpuDelegate. 468 optional EdgeTpuSettings edgetpu_settings = 8; 469 470 // For configuring the Coral EdgeTpu Delegate. 471 optional CoralSettings coral_settings = 10; 472 473 // Whether to automatically fall back to TFLite CPU path. 474 optional FallbackSettings fallback_settings = 9; 475 476 // Whether to disable default delegates (XNNPack). 477 optional bool disable_default_delegates = 12; 478} 479 480// Whether to automatically fallback to TFLite CPU path on delegation errors. 481// 482// Typically fallback is enabled in production use but disabled in tests and 483// benchmarks to ensure they test the intended path. 484message FallbackSettings { 485 // Whether to allow automatically falling back to TfLite CPU path on 486 // compilation failure. Default is not allowing automatic fallback. 487 // 488 // This is useful in naive production usecases where the caller would prefer 489 // for the model to run even if it's not accelerated. More advanced users will 490 // implement fallback themselves; e.g., by using a different model on CPU. 491 // 492 // Note that compilation errors may occur either at initial 493 // ModifyGraphWithDelegate() time, or when calling AllocateTensors() after 494 // resizing. 495 optional bool allow_automatic_fallback_on_compilation_error = 7; 496 // Whether to allow automatically falling back to TfLite CPU path on 497 // execution error. Default is not allowing automatic fallback. 498 // 499 // Experimental, use with care (only when you have complete control over the 500 // client code). 501 // 502 // The caveat above for compilation error holds. Additionally, execution-time 503 // errors are harder to handle automatically as they require invalidating the 504 // TfLite interpreter which most client code has not been designed to deal 505 // with. 506 optional bool allow_automatic_fallback_on_execution_error = 8; 507} 508 509// On-device mini-benchmark result storage. The following definitions are used 510// to keep an append-only log of benchmark results on-device. (Hence there is 511// single top-level event that is used for all data). 512// 513// These definitions don't need a proto-to-flatbuffer conversion, since they are 514// not used for specifying configuration in the Tasks library. 515 516// Which stage of benchmarking the event is for. 517// There might be multiple events with the same type, if a benchmark is run 518// multiple times. 519enum BenchmarkEventType { 520 UNDEFINED_BENCHMARK_EVENT_TYPE = 0; 521 // Benchmark start. A start without an end can be interpreted as a test that 522 // has crashed or hung. 523 START = 1; 524 // Benchmarking completion. A model was successfully loaded, acceleration 525 // configured and inference run without errors. There may still be an issue 526 // with correctness of results, or with performance. 527 END = 2; 528 // Benchmark was not completed due to an error. The error may be a handled 529 // error (e.g., failure in a delegate), or a crash. 530 ERROR = 3; 531 // Benchmark data has been sent for logging. 532 LOGGED = 4; 533 // Benchmark encountered an error but was able to continue. The error is not 534 // related to the model execution but to the mini-benchmark logic. An example 535 // of error is a failure when trying to set the CPU affinity of the benchmark 536 // runner process. 537 RECOVERED_ERROR = 5; 538} 539 540// A correctness metric from a benchmark, for example KL-divergence between 541// known-good CPU output and on-device output. These are primarily used for 542// telemetry and monitored server-side. 543message BenchmarkMetric { 544 optional string name = 1; 545 repeated float values = 2 [packed = true]; 546} 547 548// Outcome of a successfully complete benchmark run. This information is 549// intended to both be used on-device to select best compute configuration as 550// well as sent to server for monitoring. 551// 552// Used with event type END. 553message BenchmarkResult { 554 // Time to load model and apply acceleration. Initialization may get run 555 // multiple times to get information on variance. 556 repeated int64 initialization_time_us = 1 [packed = true]; 557 // Time to run inference (call Invoke()). Inference may get run multiple times 558 // to get information on variance. 559 repeated int64 inference_time_us = 2 [packed = true]; 560 // Maximum memory used. Measures size of application heap (does not 561 // necessarily take into account driver-side allocation. 562 optional int32 max_memory_kb = 3; 563 // Whether the inference produced correct results (validation graph output 564 // 'ok' for all test inputs). Used on-device to disallow configurations that 565 // produce incorrect results (e.g., due to OpenCL driver bugs). 566 optional bool ok = 4; 567 // Metrics that were used to determine the 'ok' status. 568 repeated BenchmarkMetric metrics = 5; 569} 570 571// A handled error. 572message ErrorCode { 573 // Which delegate the error comes from (or NONE, if it comes from the tflite 574 // framework). 575 optional Delegate source = 1; 576 // What the tflite level error is. 577 optional int32 tflite_error = 2; 578 // What the underlying error is (e.g., NNAPI or OpenGL error). 579 optional int64 underlying_api_error = 3; 580} 581 582// When during benchmark execution an error occurred. 583enum BenchmarkStage { 584 UNKNOWN = 0; 585 // During model loading or delegation. 586 INITIALIZATION = 1; 587 // During inference. 588 INFERENCE = 2; 589} 590 591// An error that occurred during benchmarking. 592// 593// Used with event type ERROR. 594message BenchmarkError { 595 // How far benchmarking got. 596 optional BenchmarkStage stage = 1; 597 // Process exit code. 598 optional int32 exit_code = 2; 599 // Signal the process received. 600 optional int32 signal = 3; 601 // Handled tflite error. 602 repeated ErrorCode error_code = 4; 603 // Mini-benchmark error code. 604 optional int32 mini_benchmark_error_code = 5; 605} 606 607// Top-level benchmarking event stored on-device. All events for a model are 608// parsed to detect the status. 609message BenchmarkEvent { 610 // Which settings were used for benchmarking. 611 optional TFLiteSettings tflite_settings = 1; 612 // Type of the event. 613 optional BenchmarkEventType event_type = 2; 614 // Result of benchmark, used when type is END. 615 optional BenchmarkResult result = 3; 616 // Error during benchmark, used when type is ERROR. 617 optional BenchmarkError error = 4; 618 // Start timestamps. These are used for 619 // 1. Checking whether a test was started but not completed within a given 620 // deadline. 621 // 2. Optionally, telemetry timestamps. 622 optional int64 boottime_us = 5; 623 optional int64 wallclock_us = 6; 624} 625 626// Represent the decision on the best acceleration from the mini-benchmark. 627message BestAccelerationDecision { 628 // Number of events used to take the decision. 629 // Using just the size instaed of the full list of events to save space. 630 optional int32 number_of_source_events = 1; 631 632 // Event with min latency in the source ones. 633 optional BenchmarkEvent min_latency_event = 2; 634 635 // Min latency as read from min_latency_event. 636 optional int64 min_inference_time_us = 3; 637} 638 639// Represent a failure during the initialization of the mini-benchmark. 640message BenchmarkInitializationFailure { 641 // Status code returned by the mini-benchmark initialization function. 642 optional int32 initialization_status = 1; 643} 644 645// Events generated by the mini-benchmark before and after triggering 646// the different configuration-specific benchmarks 647message MiniBenchmarkEvent { 648 // Not using oneof because of the way the generated cpp code. 649 // See comment above on TfLite settings for details. 650 651 // If set to true, this event is used to mark all previous events in the 652 // mini-benchmark internal storage as read and one of the other fields 653 // in this message will have a value. 654 optional bool is_log_flushing_event = 1; 655 // Event generated when a best acceleration decision is taken. 656 optional BestAccelerationDecision best_acceleration_decision = 2; 657 // Reports a failure during mini-benchmark initialization. 658 optional BenchmarkInitializationFailure initialization_failure = 3; 659 // Event generated while benchmarking the different settings to test locally. 660 optional BenchmarkEvent benchmark_event = 4; 661} 662 663// How to access the model for mini-benchmark. 664// Since mini-benchmark runs in a separate process, it can not access an 665// in-memory model. It can read the model either from a file or from a file 666// descriptor. The file descriptor typically comes from the Android asset 667// manager. 668// 669// Users should set either filename, or all of fd, offset and length. 670message ModelFile { 671 // Filename for reading model from. 672 optional string filename = 1; 673 // File descriptor to read model from. 674 optional int64 fd = 2; 675 // Offset for model in file descriptor. 676 optional int64 offset = 3; 677 // Length of model in file descriptor. 678 optional int64 length = 4; 679} 680 681// Where to store mini-benchmark state. 682message BenchmarkStoragePaths { 683 // Base path to the files used to store benchmark results in. Two files 684 // will be generated: one with the given path and an extra file to store 685 // events related to best acceleration results at path storage_file_path + 686 // ".extra.fb". Must be specific to the model. 687 // Note on Android, this should be the code cache directory. 688 optional string storage_file_path = 1; 689 690 // Path to a directory for intermediate files (lock files, extracted 691 // binaries). 692 // Note on Android, this typically is the data cache directory (i.e. the one 693 // returned by `getCacheDir()`). 694 optional string data_directory_path = 2; 695} 696 697// How to run a minibenchmark. 698message MinibenchmarkSettings { 699 // Which settings to test. This would typically be filled in from an 700 // allowlist. 701 repeated TFLiteSettings settings_to_test = 1; 702 // How to access the model. This would typically be set dynamically, as it 703 // depends on the application folder and/or runtime state. 704 optional ModelFile model_file = 2; 705 // Where to store state. This would typically be set dynamically, as it 706 // depends on the application folder. 707 optional BenchmarkStoragePaths storage_paths = 3; 708} 709