1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "plugin/device/ascend/hal/device/launch_transdata.h"
18
19 #include <algorithm>
20
21 #include "abstract/utils.h"
22 #include "backend/common/session/single_kernel_graph.h"
23 #include "include/backend/anf_runtime_algorithm.h"
24 #include "include/common/utils/anfalgo.h"
25 #include "runtime/device/memory_manager.h"
26 #include "plugin/device/ascend/hal/device/ascend_memory_pool.h"
27 #include "plugin/device/ascend/hal/device/ascend_stream_manager.h"
28 #include "plugin/device/ascend/kernel/acl/acl_kernel_build.h"
29 #include "acl/acl_rt.h"
30 #include "ops/array_op_name.h"
31
32 namespace mindspore::device::ascend {
GetKernelOutputAddr()33 std::vector<uint8_t *> LaunchTransData::GetKernelOutputAddr() { return outputs_addr_; }
34
SetInputAddr(void * input_addr)35 void LaunchTransData::SetInputAddr(void *input_addr) { input_addr_ = input_addr; }
36
FreeDeviceMem()37 void LaunchTransData::FreeDeviceMem() {
38 input_addr_ = nullptr;
39 for (size_t i = 0; i < outputs_addr_.size(); ++i) {
40 if (outputs_addr_[i] != nullptr) {
41 AscendMemoryPool::GetInstance().FreeTensorMem(outputs_addr_[i]);
42 outputs_addr_[i] = nullptr;
43 }
44 }
45 outputs_addr_.clear();
46 }
47
SetKernelBuildInfo()48 void LaunchTransData::SetKernelBuildInfo() {
49 if (!kernel_graph_->execution_order().empty()) {
50 auto new_op = kernel_graph_->execution_order()[0];
51 std::vector<TypeId> device_type = {dtype_};
52 auto input_format = (src_format_ == kOpFormat_NCHW) ? kOpFormat_DEFAULT : src_format_;
53 auto output_format = (dst_format_ == kOpFormat_NCHW) ? kOpFormat_DEFAULT : dst_format_;
54 std::vector<std::string> inputs_format = {input_format};
55 std::vector<std::string> outputs_format = {output_format};
56 std::vector<kernel::KernelObjectType> input_object_types = {kernel::KernelObjectType::TENSOR};
57 std::vector<kernel::KernelObjectType> output_object_types{kernel::KernelObjectType::TENSOR};
58 // set build info
59 auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
60 builder->SetKernelType(KernelType::ACL_KERNEL);
61 builder->SetInputsDeviceType(device_type);
62 builder->SetOutputsDeviceType(device_type);
63 builder->SetInputsFormat(inputs_format);
64 builder->SetOutputsFormat(outputs_format);
65 builder->SetInputsKernelObjectType(input_object_types);
66 builder->SetOutputsKernelObjectType(output_object_types);
67 builder->SetInputsReshapeType({});
68 builder->SetOutputsReshapeType({});
69 AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), new_op.get());
70 // set attr
71 bool in_def_flag = IsOneOfDefaultFormat(input_format);
72 bool out_def_flag = IsOneOfDefaultFormat(output_format);
73 common::AnfAlgo::SetNodeAttr(kAttrInputDefaultFormat, MakeValue(in_def_flag), new_op);
74 common::AnfAlgo::SetNodeAttr(kAttrOutputDefaultFormat, MakeValue(out_def_flag), new_op);
75 common::AnfAlgo::SetNodeAttr(kAttrSrcFormat, MakeValue(src_format_), new_op);
76 common::AnfAlgo::SetNodeAttr(kAttrDstFormat, MakeValue(dst_format_), new_op);
77 common::AnfAlgo::SetNodeAttr(kAttrGroups, MakeValue(groups_), new_op);
78 common::AnfAlgo::SetNodeAttr(kAttrFracZGroup, MakeValue(groups_), new_op);
79 }
80 }
81
ConstructKernelGraph()82 void LaunchTransData::ConstructKernelGraph() {
83 std::vector<TypeId> input_dtypes = {dtype_};
84 std::vector<TypeId> output_dtypes = {dtype_};
85 // obtain input & output shape
86 std::vector<ShapeVector> input_shapes = {{shape_}};
87 std::vector<ShapeVector> output_shapes = {{shape_}};
88 kernel_graph_ = session::SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp(
89 kIdentityOpName, input_dtypes, input_shapes, output_dtypes, output_shapes);
90 MS_EXCEPTION_IF_NULL(kernel_graph_);
91 }
92
AllocDeviceMem(size_t size)93 uint8_t *LaunchTransData::AllocDeviceMem(size_t size) {
94 auto device_memory = AscendMemoryPool::GetInstance().AllocTensorMem(size, false, stream_id_);
95 if (device_memory == nullptr) {
96 MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << size << "B.";
97 }
98 return static_cast<uint8_t *>(device_memory);
99 }
100
CreateOutputAddr(const std::vector<size_t> & outputs_list,std::vector<kernel::KernelTensorPtr> * kernel_tensors)101 void LaunchTransData::CreateOutputAddr(const std::vector<size_t> &outputs_list,
102 std::vector<kernel::KernelTensorPtr> *kernel_tensors) {
103 MS_EXCEPTION_IF_NULL(kernel_tensors);
104 // init output_addr_
105 outputs_addr_ = std::vector<uint8_t *>(outputs_list.size(), nullptr);
106 if (outputs_addr_.size() < outputs_list.size()) {
107 MS_LOG_EXCEPTION << "Error addr size!";
108 }
109 kernel_tensors->clear();
110 for (size_t i = 0; i < outputs_list.size(); ++i) {
111 auto size = MemoryManager::GetCommonAlignSize(outputs_list[i]);
112 outputs_addr_[i] = AllocDeviceMem(size);
113 auto kernel_tensor = std::make_shared<kernel::KernelTensor>(
114 outputs_addr_[i], size, kernel::GetFormatFromStrToEnum(dst_format_), dtype_, shape_, kAscendDevice, 0);
115 kernel_tensors->emplace_back(kernel_tensor);
116 }
117 }
118
AclKernelBuild()119 void LaunchTransData::AclKernelBuild() {
120 auto kernel = kernel_graph_->execution_order()[0];
121 kernel_mod_ = kernel::AclOpBuild(kernel);
122 MS_EXCEPTION_IF_NULL(kernel_mod_);
123 AnfAlgo::SetKernelMod(kernel_mod_, kernel.get());
124 }
125
LaunchOpKernel()126 void LaunchTransData::LaunchOpKernel() {
127 // construct graph
128 if (kernel_graph_ == nullptr) {
129 ConstructKernelGraph();
130 }
131 SetKernelBuildInfo();
132 AclKernelBuild();
133
134 // inputs
135 std::vector<kernel::KernelTensor *> kernel_inputs;
136 auto input = std::make_shared<kernel::KernelTensor>(
137 input_addr_, total_size_, kernel::GetFormatFromStrToEnum(src_format_), dtype_, shape_, kAscendDevice, 0);
138 kernel_inputs.push_back(input.get());
139
140 // outputs
141 std::vector<kernel::KernelTensor *> kernel_outputs;
142 std::vector<kernel::KernelTensorPtr> output_tensors;
143 CreateOutputAddr(kernel_mod_->GetOutputSizeList(), &output_tensors);
144 (void)std::transform(output_tensors.begin(), output_tensors.end(), std::back_inserter(kernel_outputs),
145 [](kernel::KernelTensorPtr &tensor) { return tensor.get(); });
146
147 // workspaces
148 std::vector<kernel::KernelTensor *> kernel_workspace;
149 const auto stream = AscendStreamMng::GetInstance().GetStream(stream_id_);
150
151 // launch
152 auto ret_status = kernel_mod_->Launch(kernel_inputs, kernel_workspace, kernel_outputs, stream);
153 if (!ret_status) {
154 MS_LOG(EXCEPTION) << "Launch transdata single kernel failed";
155 }
156 }
157 } // namespace mindspore::device::ascend
158