/* * Copyright (c) Qualcomm Innovation Center, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #pragma once #include #include #include #include #include #include #include #include #include #include #include #include namespace py = pybind11; namespace executorch { namespace backends { namespace qnn { class PyQnnManager { public: // used for AoT compilation explicit PyQnnManager(const py::bytes& buffer) : qnn_executorch_option_ptr_(buffer), qnn_executorch_context_binary_(QNN_EXECUTORCH_CONTEXT_BINARY) { // Choose non-allocating non-owning string pieces exposed as string_view for // parsers auto qnn_executorch_options = GetQnnExecuTorchOptions( qnn_executorch_option_ptr_.cast().data()); qnn_manager_ = std::make_shared( qnn_executorch_options, qnn_executorch_context_binary_); } // used for loading context binary directly explicit PyQnnManager(const py::bytes& buffer, const py::bytes& ctx_bin) : qnn_executorch_option_ptr_(buffer) { auto qnn_executorch_options = GetQnnExecuTorchOptions( qnn_executorch_option_ptr_.cast().data()); py::buffer_info info(py::buffer(ctx_bin).request()); qnn_executorch_context_binary_.buffer = info.ptr; qnn_executorch_context_binary_.nbytes = info.size * info.itemsize; qnn_manager_ = std::make_shared( qnn_executorch_options, qnn_executorch_context_binary_); } // used for loading multiple graphs in qcir explicit PyQnnManager(const py::bytes& buffer, const py::list& qcirs) : qnn_executorch_option_ptr_(buffer) { auto qnn_executorch_options = GetQnnExecuTorchOptions( qnn_executorch_option_ptr_.cast().data()); // merge multiple qcirs into one context with multiple graphs std::vector> graphs; for (size_t i = 0; i < qcirs.size(); ++i) { py::buffer_info info(py::buffer(qcirs[i].cast()).request()); flatbuffers::Verifier verifier_binary_info( static_cast(info.ptr), info.size * info.itemsize); if (!qnn_delegate::VerifyBinaryInfoBuffer(verifier_binary_info)) { QNN_EXECUTORCH_LOG_ERROR("Fail to verify binary info"); return; } auto binary_info = qnn_delegate::GetBinaryInfo(info.ptr); flatbuffers::Verifier verifier_qcir( binary_info->data()->data(), binary_info->data()->size()); if (!qcir::VerifyContextBuffer(verifier_qcir)) { QNN_EXECUTORCH_LOG_ERROR("Fail to verify qcir format"); return; } auto context = qcir::GetContext(binary_info->data()->data()); for (const auto& graph : *context->graphs()) { std::vector> tensors; for (const auto tensor : *graph->tensors()) { // here we need to take a detour to merge multiple qcir flatbuffers // outer ToTensor // return: flatbuffers::Offset // consume: QnnTensor, flatbuffers::FlatBufferBuilder* // inner ToTensor // return: QnnTensor // consume: flatbuffers::Vector<::flatbuffers::Offset> tensors.emplace_back(ToTensor(ToTensor(tensor), &builder_)); } std::vector> nodes; for (const auto& node : *graph->nodes()) { int32_t* inputs_ptr = const_cast(node->inputs()->data()); int32_t* outputs_ptr = const_cast(node->outputs()->data()); int32_t* params_ptr = const_cast(node->params()->data()); std::vector inputs( inputs_ptr, inputs_ptr + node->inputs()->size()); std::vector outputs( outputs_ptr, outputs_ptr + node->outputs()->size()); std::vector params( params_ptr, params_ptr + node->params()->size()); nodes.emplace_back(qcir::CreateOperatorDirect( builder_, node->name()->str().c_str(), node->package_name()->str().c_str(), node->type_name()->str().c_str(), &inputs, &outputs, ¶ms)); } graphs.emplace_back(qcir::CreateGraphDirect( builder_, graph->name()->str().c_str(), &nodes, &tensors)); } } auto context = qcir::CreateContextDirect(builder_, &graphs); builder_.Finish(context); QnnExecuTorchContextBinary qcir_bin( {builder_.GetBufferPointer(), builder_.GetSize()}); qnn_executorch_context_binary_ = MakeBinaryInfo(qcir_bin); qnn_manager_ = std::make_shared( qnn_executorch_options, qnn_executorch_context_binary_); } executorch::runtime::Error Init() { return qnn_manager_->Init(); } bool IsNodeSupportedByBackend( std::vector>& op_wrappers) { return qnn_manager_->IsNodeSupportedByBackend(op_wrappers); } // this method is specific for compiling multi-graphs py::array_t Compile() { if (qnn_manager_->CompileQcir() != Error::Ok) { QNN_EXECUTORCH_LOG_ERROR("Fail to compile qcir"); return py::array_t(0); } // generate context binary if compilation succeded QnnExecuTorchContextBinary binary_info; qnn_manager_->GetContextBinary(binary_info); // allocate py::array (to pass the result of the C++ function to Python) auto result = py::array_t(binary_info.nbytes); auto result_buffer = result.request(); char* result_ptr = (char*)result_buffer.ptr; std::memcpy(result_ptr, binary_info.buffer, binary_info.nbytes); return result; } py::array_t Compile( const std::string& graph_name, std::vector>& op_wrappers) { QnnExecuTorchContextBinary binary_info; if (qnn_manager_->IsOnlinePrepare() || qnn_manager_->IsMultipleGraphs()) { builder_.Reset(); std::vector> tensors; std::unordered_map tensor_map; auto set_tensor = [&](const std::shared_ptr& wrapper, std::vector& index) { auto it = tensor_map.find(wrapper.get()); if (it != tensor_map.end()) { index.push_back(it->second); } else { int i = tensors.size(); tensor_map[wrapper.get()] = i; index.push_back(i); tensors.emplace_back( ToTensor(wrapper->CloneTensorStruct(), &builder_)); } }; std::vector> operators; for (std::shared_ptr& op_wrapper : op_wrappers) { std::vector inputs, outputs, params; for (const auto& tensor_wrapper : op_wrapper->GetInputTensors()) { set_tensor(tensor_wrapper, inputs); } for (const auto& tensor_wrapper : op_wrapper->GetOutputTensors()) { set_tensor(tensor_wrapper, outputs); } for (const auto& param : op_wrapper->GetParams()) { auto* p_tensor_param = dynamic_cast(param.get()); if (p_tensor_param != nullptr) { auto wrapper = p_tensor_param->GetTensorWrapper(); wrapper->SetName(param->GetName()); set_tensor(wrapper, params); } else { executorch::runtime::Error err = param->PopulateQnnParam(); if (err != executorch::runtime::Error::Ok) { QNN_EXECUTORCH_LOG_ERROR( "Fail to get scalar parameter in online prepare stage"); return py::array_t(0); } Qnn_Param_t p = param->GetQnnParam(); Qnn_Tensor_t t = QNN_TENSOR_INIT; QNN_VER_PTR(t)->name = p.name; QNN_VER_PTR(t)->dataType = p.scalarParam.dataType; QNN_VER_PTR(t)->clientBuf.data = static_cast(&p.scalarParam.uint8Value); QNN_VER_PTR(t)->clientBuf.dataSize = GetDataTypeSize(QNN_VER_PTR(t)->dataType); params.push_back(tensors.size()); tensors.emplace_back(ToTensor(t, &builder_)); } } Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig(); operators.emplace_back(qcir::CreateOperatorDirect( builder_, QNN_VER_PTR(op_config)->name, QNN_VER_PTR(op_config)->packageName, QNN_VER_PTR(op_config)->typeName, &inputs, &outputs, ¶ms)); } auto graph = qcir::CreateGraphDirect( builder_, graph_name.c_str(), &operators, &tensors); std::vector> graphs({graph}); auto context = qcir::CreateContextDirect(builder_, &graphs); builder_.Finish(context); QnnExecuTorchContextBinary qcir_binary( {builder_.GetBufferPointer(), builder_.GetSize()}); binary_info = MakeBinaryInfo(qcir_binary); } else { if (qnn_manager_->Compile(graph_name, op_wrappers) != executorch::runtime::Error::Ok) { QNN_EXECUTORCH_LOG_ERROR("Fail to compile QNN graph"); return py::array_t(0); } if (qnn_manager_->GetContextBinary(binary_info) != executorch::runtime::Error::Ok) { return py::array_t(0); } } // allocate py::array (to pass the result of the C++ function to Python) auto result = py::array_t(binary_info.nbytes); auto result_buffer = result.request(); char* result_ptr = (char*)result_buffer.ptr; std::memcpy(result_ptr, binary_info.buffer, binary_info.nbytes); return result; } void Destroy() { return qnn_manager_->Destroy(); } bool IsAvailable() { return qnn_manager_->IsAvailable(); } bool IsTensorDump() { return qnn_manager_->IsTensorDump(); } executorch::runtime::Error AllocateTensor(const std::string& graph_name) { return qnn_manager_->AllocateTensor(graph_name); } py::list GetGraphInputs(const std::string& graph_name) { py::list ret; for (const std::shared_ptr& input : qnn_manager_->GetGraphInputs(graph_name)) { ret.append(PyQnnTensorWrapper(input)); } return ret; } py::list GetGraphOutputs(const std::string& graph_name) { py::list ret; for (const std::shared_ptr& output : qnn_manager_->GetGraphOutputs(graph_name)) { ret.append(PyQnnTensorWrapper(output)); } return ret; } py::list GetGraphNames() { py::list ret; for (const std::string& graph_name : qnn_manager_->GetGraphNames()) { ret.append(graph_name); } return ret; } uint64_t GetSpillFillBufferSize() { return qnn_manager_->GetSpillFillBufferSize(); } py::array_t MakeBinaryInfo(const py::bytes& ctx_bin) { py::buffer_info info(py::buffer(ctx_bin).request()); QnnExecuTorchContextBinary binary( {info.ptr, static_cast(info.size * info.itemsize)}); auto binary_info = MakeBinaryInfo(binary); auto result = py::array_t(binary_info.nbytes); auto result_buffer = result.request(); std::memcpy(result_buffer.ptr, binary_info.buffer, binary_info.nbytes); return result; } private: QnnExecuTorchContextBinary MakeBinaryInfo( const QnnExecuTorchContextBinary& ctx_bin) { auto signature = []() { return std::to_string( std::chrono::high_resolution_clock::now().time_since_epoch().count()); }; const uint8_t* base = static_cast(ctx_bin.buffer); std::vector data(base, base + ctx_bin.nbytes); // add signature to binary for cache reuse in runtime builder_.Reset(); auto binary_info = qnn_delegate::CreateBinaryInfoDirect( builder_, signature().c_str(), &data); builder_.Finish(binary_info); return QnnExecuTorchContextBinary( {builder_.GetBufferPointer(), builder_.GetSize()}); } // Store the bytes object instead of a raw pointer so that this module will // keep the bytes alive. const py::bytes qnn_executorch_option_ptr_; QnnExecuTorchContextBinary qnn_executorch_context_binary_; std::shared_ptr qnn_manager_; flatbuffers::FlatBufferBuilder builder_; }; } // namespace qnn } // namespace backends } // namespace executorch