android-13.0.0_r83/s

/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
#define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_

#include <map>
#include <memory>

#include "tensorflow/lite/allocation.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
#include "tensorflow/lite/nnapi/nnapi_implementation.h"

namespace tflite {
namespace delegate {
namespace nnapi {

constexpr int32_t kMinSdkVersionForNNAPI = 27;
constexpr int32_t kMinSdkVersionForNNAPI11 = 28;
constexpr int32_t kMinSdkVersionForNNAPI12 = 29;
constexpr int32_t kMinSdkVersionForNNAPI13 = 30;
// TODO(b/185838597): change the remaining kMinSdkVersionForNNAPI* to
// kNNAPIRuntimeFeatureLevel*.
constexpr int32_t kNNAPIRuntimeFeatureLevel5 = 31;

// Track tensor indices to NN API tensor indices mapping.
class OperandMapping {
 public:
  // Given a TFLite index return the ANN index. If it doesn't exist
  // return -1.
  int lite_index_to_ann(int index) const {
    const int64_t max_size = lite_tensor_to_ann_tensor_.size();
    if (index >= 0 && index < max_size)
      return lite_tensor_to_ann_tensor_[index];
    else
      return -1;
  }

  // NN API uses non tensor operands instead of structs. This creates one
  // and returns the index. It uses a std::vector and resizes it as needed
  // keeping -1 to unmapped values. Intermediate tensors likely will not
  // be mapped.
  int add_new_non_tensor_operand() { return next_ann_tensor_index_++; }

  // This call is necessary for input operands generated by the delegate
  // to map constant inputs not present in TFLite but required by NNAPI,
  // for example when splitting one input in several ones.
  int add_delegate_generated_input_ann_tensors_operand() {
    return next_ann_tensor_index_++;
  }

  // Add a new mapping from `tflite_index` and return the NN API tensor index.
  int add_new_ann_tensor_index(int tflite_index) {
    const int64_t current_size = lite_tensor_to_ann_tensor_.size();
    if (tflite_index >= current_size) {
      lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
    }
    const int new_tensor_index = next_ann_tensor_index_++;
    lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index;
    return new_tensor_index;
  }

  // Given a TFLite index returns a TFLite type to which a tensor must be
  // converted during copying the data to the memory allocated for NN API.
  // kTfLiteNoType means no conversion is needed.
  TfLiteType lite_index_to_ann_type_conversion(int index) const {
    const int64_t max_size = index_to_type_conversion_.size();
    if (index >= 0 && index < max_size)
      return index_to_type_conversion_[index];
    else
      return kTfLiteNoType;
  }

  // Add a new mapping from TFLite index to a type conversion.
  void add_type_conversion(int tflite_index, TfLiteType tflite_type) {
    const int64_t current_size = index_to_type_conversion_.size();
    if (tflite_index >= current_size) {
      index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType);
    }
    index_to_type_conversion_[tflite_index] = tflite_type;
  }

 private:
  // Next index of ann tensor
  int next_ann_tensor_index_ = 0;

  // Mapping from lite index. Use a std::vector for speed and code size
  // rather than a map.
  std::vector<int> lite_tensor_to_ann_tensor_;
  // Mapping from lite index to a type which tensor must be converted to during
  // the copying of the data to the memory allocated for NN API. kTfLiteNoType
  // means no conversion is needed. Use an std::vector for speed and code size
  // rather than a map.
  std::vector<TfLiteType> index_to_type_conversion_;
};

class NNAPIOpBuilder;

// The kernel that represents the node sub set of TF Lite being run on NN API.
struct NNAPIOpMappingArgs {
  TfLiteContext* context;
  NNAPIOpBuilder* builder;
  TfLiteNode* node;
  int node_index;
  std::vector<int>* model_state_outputs;
  std::vector<int>* model_state_tfl_inputs;
  std::vector<std::tuple<int, int>>* feedback_loops;
  int* nnapi_errno;
};

// RAII NN API Model Destructor for use with std::unique_ptr
class NNFreeModel {
 public:
  explicit NNFreeModel(const NnApi* nnapi) : nnapi_(nnapi) {}
  void operator()(ANeuralNetworksModel* model) {
    nnapi_->ANeuralNetworksModel_free(model);
  }

 private:
  // NnApi instance to use. Not owned by this object.
  const NnApi* nnapi_;
};
// RAII NN API Compilation Destructor for use with std::unique_ptr
class NNFreeCompilation {
 public:
  explicit NNFreeCompilation(const NnApi* nnapi) : nnapi_(nnapi) {}
  void operator()(ANeuralNetworksCompilation* model) {
    nnapi_->ANeuralNetworksCompilation_free(model);
  }

 private:
  // NnApi instance to use. Not owned by this object.
  const NnApi* nnapi_;
};
// RAII NN API Execution Destructor for use with std::unique_ptr
class NNFreeExecution {
 public:
  explicit NNFreeExecution(const NnApi* nnapi) : nnapi_(nnapi) {}
  void operator()(ANeuralNetworksExecution* execution) {
    nnapi_->ANeuralNetworksExecution_free(execution);
  }

 private:
  // NnApi instance to use. Not owned by this object.
  const NnApi* nnapi_;
};
// RAII NN API Burst Destructor for use with std::unique_ptr
class NNFreeBurst {
 public:
  explicit NNFreeBurst(const NnApi* nnapi) : nnapi_(nnapi) {}
  void operator()(ANeuralNetworksBurst* model) {
    nnapi_->ANeuralNetworksBurst_free(model);
  }

 private:
  // NnApi instance to use. Not owned by this object.
  const NnApi* nnapi_;
};

// Manage NNAPI shared memory handle
class NNMemory {
 public:
  NNMemory(const NnApi* nnapi, const char* name, size_t size);

  ~NNMemory();

  ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; }
  uint8_t* get_data_ptr() { return data_ptr_; }
  size_t get_byte_size() { return byte_size_; }

 private:
  // NnApi instance to use. Not owned by this object.
  const NnApi* nnapi_;
  int fd_ = 0;
  size_t byte_size_ = 0;
  uint8_t* data_ptr_ = nullptr;
  ANeuralNetworksMemory* nn_memory_handle_ = nullptr;
#ifndef __ANDROID__
  std::string shm_region_name_;
#endif
};

// LINT.IfChange
enum class NNAPIValidationFailureType : int {
  // The operator is not supported by either NNAPI or the NNAPI Delegate.
  kUnsupportedOperator = 0,
  // The given operation or operands are not supported on the specified
  // Android SDK version. The min supported version is specified in the
  // validation failure message.
  kUnsupportedAndroidVersion = 1,
  // The version of the operator (value of TfLiteRegistration::version)
  // for the given op is not supported. The max supported version
  // is specified in the validation failure message.
  // For more details on each operator version see
  // the GetBuiltinOperatorVersion function in
  // third_party/tensorflow/lite/tools/versioning/op_version.cc.
  kUnsupportedOperatorVersion = 2,
  // The given input operand type is not supported for the current combination
  // of operator type and sdk version.
  kUnsupportedInputType = 3,
  // When using NN API version 1.0 or 1.1, the condition
  //   input_scale * filter_scale < output_scale
  // must be true for quantized versions of the following ops:
  // * CONV_2D
  // * DEPTHWISE_CONV_2D
  // * FULLY_CONNECTED (where filter actually stands for weights)
  // The condition is relaxed and no longer required since version 1.2.
  kNotRestrictedScaleCompliant = 4,
  // The given output operand type is not supported for the current combination
  // of operator type and sdk version.
  kUnsupportedOutputType = 5,
  // The size of the operand tensor is too large.
  kUnsupportedOperandSize = 6,
  // The value of one of the operands or of a combination of operands is
  // not supported. Details are provided in the failure message.
  kUnsupportedOperandValue = 7,
  // The combination of float inputs and quantized weights or filters
  // is not supported
  kUnsupportedHybridOperator = 8,
  // The quantization type (for example per-channel quantization) is not
  // supported.
  kUnsupportedQuantizationType = 9,
  // The accelerated version of operation requires a specific operand to be
  // specified.
  kMissingRequiredOperand = 10,
  // The rank of the operand is not supported. Details in the failure message.
  kUnsupportedOperandRank = 11,
  // The input tensor cannot be dynamically-sized.
  kInputTensorShouldHaveConstantShape = 12,
  // The operator has a different number of inputs of the one or ones that
  // are supported by NNAPI.
  kUnsupportedOperatorVariant = 13,
  // The accelerated version of the operator cannot specify an activation
  // function.
  kNoActivationExpected = 14,
  // Quantization scale and/or zero point are not in the supported value(s)
  // for the accelerated operation.
  kUnsupportedQuantizationParameters = 15,
};
// LINT.ThenChange(nnapi_linter/linter.proto)

struct NNAPIValidationFailure {
  NNAPIValidationFailureType type;
  std::string message;

  NNAPIValidationFailure(NNAPIValidationFailureType type, const char* message)
      : type(type), message(message) {}
};

// The kernel that represents the node sub set of TF Lite being run on NN API.
class NNAPIDelegateKernel {
 public:
  explicit NNAPIDelegateKernel(const NnApi* nnapi)
      : initialised_(false),
        nnapi_(nnapi),
        nn_model_(nullptr, NNFreeModel(nnapi_)),
        nn_compilation_(nullptr, NNFreeCompilation(nnapi_)),
        nn_burst_(nullptr, NNFreeBurst(nnapi_)),
        nn_execution_(nullptr, NNFreeExecution(nnapi_)) {}
  NNAPIDelegateKernel() : NNAPIDelegateKernel(NnApiImplementation()) {}
  ~NNAPIDelegateKernel() {
    for (auto content : allocation_memory_mapping_) {
      nnapi_->ANeuralNetworksMemory_free(content.second);
    }
  }

  // Translate a node into its operands
  // It assumes that the call to Validate for has been successful for
  // the operation.
  // In case of success it returns kTfLiteOk and stores in n_op_type the
  // NNAPI Operation code.
  // Returns kTfLiteError in case of failures during mapping.
  static TfLiteStatus Map(TfLiteContext* context, int builtin_code, int version,
                          int android_sdk_version,
                          const NNAPIOpMappingArgs& mapping_args,
                          ANeuralNetworksOperationType* nn_op_type);

  // Returns true if the node can be accelerated with NNAPI.
  static bool Validate(
      const TfLiteContext* context, int builtin_code, int version,
      int android_sdk_version, const TfLiteNode* node,
      bool is_accelerator_specified,
      // Collects lists of failures collected during
      // the validation of the possibility of accelerating
      // the given node
      std::vector<NNAPIValidationFailure>* map_failures = nullptr);

  // Initialize the kernel (a NN model) and builds the NN Model.
  // Any NNAPI Related error causing this method to fail will have the
  // associated error number stored in nnapi_errno
  TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params,
                    int* nnapi_errno);

  // Creates the NNAPI Compilation for the NN model. It assumes that Init has
  // been called and completed successfully.
  // Any NNAPI Related error causing this method to fail will have the
  // associated error number stored in nnapi_errno
  TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node,
                       int* nnapi_errno);

  // Invoke the NN Model. Expects Init and Prepare to have been completed
  // successfully.
  // Any NNAPI Related error causing this method to fail will have the
  // associated error number stored in nnapi_errno
  TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node,
                      int* nnapi_errno);

  // Returns the list of operations supported by the current NNAPI model as
  // built in Prepare. Every operation is identified by the index as provided
  // in the delegate parameters given to the delegate during the Init call.
  // It expects the Init method has been called and completed successfully and
  // returns kTfLiteError if not. Returns an error if any of the NNAPI
  // operations fails or if the
  // ANeuralNetworksModel_getSupportedOperationsForDevices function is not
  // available in the NnApi object.
  TfLiteStatus GetOperationsSupportedByTargetNnApiDevices(
      TfLiteContext* context, std::vector<int>* supported_nodes,
      int* nnapi_errno);

 private:
  // True if initialization has been completed successfully
  bool initialised_;
  // Access to NNApi.
  const NnApi* nnapi_;
  // ANN device handle.
  std::vector<ANeuralNetworksDevice*> nnapi_devices_;
  // Name of the nnapi device, empty if nnapi_devices_ is empty;
  std::string device_name_;
  // ANN API state.
  std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_;
  std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation>
      nn_compilation_;
  std::unique_ptr<ANeuralNetworksBurst, NNFreeBurst> nn_burst_;
  std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution> nn_execution_;
  // The mappings of tenor id to BufferHandle. Needed to track BufferHandle
  // change and alter nn_reusable_execution_ if necessary.
  std::vector<int> tensor_handle_map_;
  // Node indices that this delegate is responsible for. Indices here
  // indexes into the nodes array in the TfLiteContext.
  std::vector<int> nodes_;
  // Track indices we use
  OperandMapping operand_mapping_;
  std::map<const MMAPAllocation*, ANeuralNetworksMemory*>
      allocation_memory_mapping_;
  // Track memory map
  const std::vector<StatefulNnApiDelegate::MemoryRegistration>*
      tensor_memory_map_;
  std::vector<int> model_state_outputs_;
  std::vector<int> model_state_tfl_inputs_;
  // This is the equivalent of the pair model_state_outputs_,
  // model_state_tfl_inputs_ for all tensors where we have to keep the output
  // data available for TFLite model users
  std::vector<std::tuple<int, int>> feedback_loops_;

  std::unique_ptr<NNMemory> nn_input_memory_;
  std::unique_ptr<NNMemory> nn_output_memory_;

  std::vector<uint8_t> nn_compilation_cache_token_;

  std::vector<int> nnapi_to_tflite_op_mapping_;
  // Map of DENSIFY output tensor id to node id.
  std::vector<int> densify_output_to_node_mapping_;
  // Map of DEQUANTIZE output tensor id to node id.
  // Only contains DEQUANTIZE nodes with non-const input.
  std::vector<int> non_const_dequantize_output_to_node_mapping_;

  // Fully initialized in NNAPIDelegateKernel::AddOpsAndTensors
  int target_feature_level_ = 27;  // kMinSdkVersionForNNAPI10

  void AddDequantizeOperatorsWhereNeeded(
      const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
      int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno);

  TfLiteStatus DensifyAndDequantizeConstTensor(TfLiteContext* context,
                                               int densify_node_id,
                                               bool should_dequantize,
                                               NNAPIOpBuilder& builder);

  TfLiteStatus AddOpsAndTensors(TfLiteContext* context, int* nnapi_errno,
                                bool allow_dynamic_dimensions);

  TfLiteStatus BuildGraph(TfLiteContext* context,
                          const StatefulNnApiDelegate::Options& options,
                          const TfLiteIntArray* input_tensors,
                          const TfLiteIntArray* output_tensors,
                          int* nnapi_errno);
};

}  // namespace nnapi
}  // namespace delegate
}  // namespace tflite

#endif  // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_