/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include namespace executorch { namespace backends { namespace xnnpack { namespace delegate { using executorch::aten::ScalarType; using executorch::aten::SizesType; using executorch::aten::Tensor; using executorch::runtime::BackendExecutionContext; using executorch::runtime::Error; using executorch::runtime::EValue; using executorch::runtime::is_contiguous_dim_order; using executorch::runtime::kTensorDimensionLimit; /** * Initializes the XNNExecutor with the runtime and given number of * inputs/outputs externals_ is resized to the total number of inputs and * outputs */ ET_NODISCARD Error XNNExecutor::initialize( xnn_runtime_t runtime, std::vector&& input_ids, std::vector&& output_ids) { runtime_ = std::unique_ptr( runtime, xnn_delete_runtime); auto error = profiler_.initialize(runtime); if (error != Error::Ok) { ET_LOG( Error, "Failed to start profiling: %u.", static_cast(error)); } // Initialize the external values for inputs and outputs // mapping the executorch arg idx to external IDs input_ids_ = std::move(input_ids); std::sort(input_ids_.begin(), input_ids_.end()); output_ids_ = std::move(output_ids); std::sort(output_ids_.begin(), output_ids_.end()); externals_.resize(input_ids_.size() + output_ids_.size()); return Error::Ok; } /** * Prepares the args for XNNPACK Runtime. * * Creates an array of xnn_externals_values from the EValues passed in. * Reshapes all the external input tensors, in case any input shapes have * changed. The reshapes the entire runtime, propagating shape information * through the runtime. * * Note: the external ids given to the external tensors in the XNNPACK * runtime correspond to their index in the list of arg passed into * delegate->execute() */ ET_NODISCARD Error XNNExecutor::prepare_args(EValue** args) { // Create xnn_externals_value from evalue args xnn_status status; for (uint32_t i = 0; i < externals_.size(); ++i) { if (i < input_ids_.size()) { externals_[i].id = input_ids_[i]; } else { externals_[i].id = output_ids_[i - input_ids_.size()]; } uint32_t ext_id = externals_[i].id; ET_CHECK_OR_RETURN_ERROR( args[ext_id]->isTensor(), InvalidArgument, "Expected argument to delegate at index %u to be a Tensor, but got %" PRIu32, i, static_cast(args[ext_id]->tag)); Tensor* tensor = &args[ext_id]->toTensor(); externals_[i].data = tensor->mutable_data_ptr(); // Reshape runtime inputs if (i < input_ids_.size()) { size_t num_dims = tensor->dim(); ET_CHECK_OR_RETURN_ERROR( is_contiguous_dim_order(tensor->dim_order().data(), tensor->dim()), Internal, "Expecting default dim_order but got a non default dim_order tensor for external input %u", i); size_t dims[XNN_MAX_TENSOR_DIMS]; ET_CHECK_OR_RETURN_ERROR( num_dims <= XNN_MAX_TENSOR_DIMS, InvalidArgument, "XNNPACK backend accepts tensors with at most %d dims, but got %zu", XNN_MAX_TENSOR_DIMS, num_dims); for (int d = 0; d < num_dims; ++d) { dims[d] = tensor->size(d); } status = xnn_reshape_external_value(runtime_.get(), ext_id, num_dims, dims); ET_CHECK_OR_RETURN_ERROR( status == xnn_status_success, Internal, "Internal Error: Reshape Input Tensor Failed with code: %s", xnn_status_to_string(status)); } } // // Propagate Input Shape and Memory Plan for increased allocation status = xnn_reshape_runtime(runtime_.get()); ET_CHECK_OR_RETURN_ERROR( status == xnn_status_success, Internal, "Internal Error: Propagating input shapes failed with code: %s", xnn_status_to_string(status)); return Error::Ok; } /** * Runs the XNNPACK Runtime. * * We first setup the runtime by feeding the externals_ to runtime setup. * After which we then execute the runtime through invoke_runtime. */ ET_NODISCARD Error XNNExecutor::forward(BackendExecutionContext& context) { ET_CHECK_OR_RETURN_ERROR( runtime_ != nullptr, Internal, "XNNPACK Delegate did not compile correctly"); xnn_status status = xnn_setup_runtime_v2( runtime_.get(), externals_.size(), externals_.data()); ET_CHECK_OR_RETURN_ERROR( status == xnn_status_success, Internal, "Internal Error: Setting up the runtime failed with code: %s", xnn_status_to_string(status)); auto error = profiler_.start(context.event_tracer()); if (error != Error::Ok) { ET_LOG( Error, "Failed to start profiling: %u.", static_cast(error)); } status = xnn_invoke_runtime(runtime_.get()); error = profiler_.end(); if (error != Error::Ok) { ET_LOG( Error, "Failed to end profiling: %u.", static_cast(error)); } ET_CHECK_OR_RETURN_ERROR( status == xnn_status_success, Internal, "XNN Runtime invoke failed with code: %s", xnn_status_to_string(status)); return Error::Ok; } /** * Prepares the outputs for ExecuTorch * * Resizes the output tensors based on the output shapes returned by * the xnnpack runtime. * * Note: For arg_max pooling, we recast the output index tensor. Since * XNNPACK gives the index tensor to us as int32, we need to convert it * back to int64 for ExecuTorch. */ ET_NODISCARD Error XNNExecutor::resize_outputs(EValue** args) const { size_t output_idx_start = input_ids_.size(); for (size_t i = output_idx_start; i < externals_.size(); ++i) { uint32_t ext_id = externals_[i].id; Tensor* out_tensor = &args[ext_id]->toTensor(); size_t num_dim; size_t dims[XNN_MAX_TENSOR_DIMS]; // Fetch the updated output shapes from xnnpack runtime xnn_status status = xnn_get_external_value_shape(runtime_.get(), ext_id, &num_dim, dims); ET_CHECK_OR_RETURN_ERROR( status == xnn_status_success, Internal, "Internal Error: Failed to retrieve graph output shapes"); // Convert new output shape into SizesType SizesType expected_output_size[kTensorDimensionLimit]; for (size_t d = 0; d < num_dim; ++d) { expected_output_size[d] = static_cast(dims[d]); } executorch::aten::ArrayRef output_size{ expected_output_size, static_cast(num_dim)}; ET_LOG(Debug, "Resizing output tensor to a new shape"); Error err = resize_tensor(*out_tensor, output_size); if (err != Error::Ok) { ET_LOG(Error, "Failed to resize output tensor for XNNExecutor"); return err; } // Output datatype is int64. However, XNNPACK doesn't support // int64. This means that the data was put into this tensor // by XNNPACK as int32 and needs to be copied to int64 form if (out_tensor->scalar_type() == ScalarType::Long) { int64_t* data_64 = out_tensor->mutable_data_ptr(); const int32_t* data_32 = out_tensor->const_data_ptr(); for (size_t j = out_tensor->numel() - 1; j >= 0; --j) { data_64[j] = data_32[j]; } } } return Error::Ok; } } // namespace delegate } // namespace xnnpack } // namespace backends } // namespace executorch