• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "plugin/device/ascend/hal/device/launch_transdata.h"
18 
19 #include <algorithm>
20 
21 #include "abstract/utils.h"
22 #include "backend/common/session/single_kernel_graph.h"
23 #include "include/backend/anf_runtime_algorithm.h"
24 #include "include/common/utils/anfalgo.h"
25 #include "runtime/device/memory_manager.h"
26 #include "plugin/device/ascend/hal/device/ascend_memory_pool.h"
27 #include "plugin/device/ascend/hal/device/ascend_stream_manager.h"
28 #include "plugin/device/ascend/kernel/acl/acl_kernel_build.h"
29 #include "acl/acl_rt.h"
30 #include "ops/array_op_name.h"
31 
32 namespace mindspore::device::ascend {
GetKernelOutputAddr()33 std::vector<uint8_t *> LaunchTransData::GetKernelOutputAddr() { return outputs_addr_; }
34 
SetInputAddr(void * input_addr)35 void LaunchTransData::SetInputAddr(void *input_addr) { input_addr_ = input_addr; }
36 
FreeDeviceMem()37 void LaunchTransData::FreeDeviceMem() {
38   input_addr_ = nullptr;
39   for (size_t i = 0; i < outputs_addr_.size(); ++i) {
40     if (outputs_addr_[i] != nullptr) {
41       AscendMemoryPool::GetInstance().FreeTensorMem(outputs_addr_[i]);
42       outputs_addr_[i] = nullptr;
43     }
44   }
45   outputs_addr_.clear();
46 }
47 
SetKernelBuildInfo()48 void LaunchTransData::SetKernelBuildInfo() {
49   if (!kernel_graph_->execution_order().empty()) {
50     auto new_op = kernel_graph_->execution_order()[0];
51     std::vector<TypeId> device_type = {dtype_};
52     auto input_format = (src_format_ == kOpFormat_NCHW) ? kOpFormat_DEFAULT : src_format_;
53     auto output_format = (dst_format_ == kOpFormat_NCHW) ? kOpFormat_DEFAULT : dst_format_;
54     std::vector<std::string> inputs_format = {input_format};
55     std::vector<std::string> outputs_format = {output_format};
56     std::vector<kernel::KernelObjectType> input_object_types = {kernel::KernelObjectType::TENSOR};
57     std::vector<kernel::KernelObjectType> output_object_types{kernel::KernelObjectType::TENSOR};
58     // set build info
59     auto builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
60     builder->SetKernelType(KernelType::ACL_KERNEL);
61     builder->SetInputsDeviceType(device_type);
62     builder->SetOutputsDeviceType(device_type);
63     builder->SetInputsFormat(inputs_format);
64     builder->SetOutputsFormat(outputs_format);
65     builder->SetInputsKernelObjectType(input_object_types);
66     builder->SetOutputsKernelObjectType(output_object_types);
67     builder->SetInputsReshapeType({});
68     builder->SetOutputsReshapeType({});
69     AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), new_op.get());
70     // set attr
71     bool in_def_flag = IsOneOfDefaultFormat(input_format);
72     bool out_def_flag = IsOneOfDefaultFormat(output_format);
73     common::AnfAlgo::SetNodeAttr(kAttrInputDefaultFormat, MakeValue(in_def_flag), new_op);
74     common::AnfAlgo::SetNodeAttr(kAttrOutputDefaultFormat, MakeValue(out_def_flag), new_op);
75     common::AnfAlgo::SetNodeAttr(kAttrSrcFormat, MakeValue(src_format_), new_op);
76     common::AnfAlgo::SetNodeAttr(kAttrDstFormat, MakeValue(dst_format_), new_op);
77     common::AnfAlgo::SetNodeAttr(kAttrGroups, MakeValue(groups_), new_op);
78     common::AnfAlgo::SetNodeAttr(kAttrFracZGroup, MakeValue(groups_), new_op);
79   }
80 }
81 
ConstructKernelGraph()82 void LaunchTransData::ConstructKernelGraph() {
83   std::vector<TypeId> input_dtypes = {dtype_};
84   std::vector<TypeId> output_dtypes = {dtype_};
85   // obtain input & output shape
86   std::vector<ShapeVector> input_shapes = {{shape_}};
87   std::vector<ShapeVector> output_shapes = {{shape_}};
88   kernel_graph_ = session::SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp(
89     kIdentityOpName, input_dtypes, input_shapes, output_dtypes, output_shapes);
90   MS_EXCEPTION_IF_NULL(kernel_graph_);
91 }
92 
AllocDeviceMem(size_t size)93 uint8_t *LaunchTransData::AllocDeviceMem(size_t size) {
94   auto device_memory = AscendMemoryPool::GetInstance().AllocTensorMem(size, false, stream_id_);
95   if (device_memory == nullptr) {
96     MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << size << "B.";
97   }
98   return static_cast<uint8_t *>(device_memory);
99 }
100 
CreateOutputAddr(const std::vector<size_t> & outputs_list,std::vector<kernel::KernelTensorPtr> * kernel_tensors)101 void LaunchTransData::CreateOutputAddr(const std::vector<size_t> &outputs_list,
102                                        std::vector<kernel::KernelTensorPtr> *kernel_tensors) {
103   MS_EXCEPTION_IF_NULL(kernel_tensors);
104   // init output_addr_
105   outputs_addr_ = std::vector<uint8_t *>(outputs_list.size(), nullptr);
106   if (outputs_addr_.size() < outputs_list.size()) {
107     MS_LOG_EXCEPTION << "Error addr size!";
108   }
109   kernel_tensors->clear();
110   for (size_t i = 0; i < outputs_list.size(); ++i) {
111     auto size = MemoryManager::GetCommonAlignSize(outputs_list[i]);
112     outputs_addr_[i] = AllocDeviceMem(size);
113     auto kernel_tensor = std::make_shared<kernel::KernelTensor>(
114       outputs_addr_[i], size, kernel::GetFormatFromStrToEnum(dst_format_), dtype_, shape_, kAscendDevice, 0);
115     kernel_tensors->emplace_back(kernel_tensor);
116   }
117 }
118 
AclKernelBuild()119 void LaunchTransData::AclKernelBuild() {
120   auto kernel = kernel_graph_->execution_order()[0];
121   kernel_mod_ = kernel::AclOpBuild(kernel);
122   MS_EXCEPTION_IF_NULL(kernel_mod_);
123   AnfAlgo::SetKernelMod(kernel_mod_, kernel.get());
124 }
125 
LaunchOpKernel()126 void LaunchTransData::LaunchOpKernel() {
127   // construct graph
128   if (kernel_graph_ == nullptr) {
129     ConstructKernelGraph();
130   }
131   SetKernelBuildInfo();
132   AclKernelBuild();
133 
134   // inputs
135   std::vector<kernel::KernelTensor *> kernel_inputs;
136   auto input = std::make_shared<kernel::KernelTensor>(
137     input_addr_, total_size_, kernel::GetFormatFromStrToEnum(src_format_), dtype_, shape_, kAscendDevice, 0);
138   kernel_inputs.push_back(input.get());
139 
140   // outputs
141   std::vector<kernel::KernelTensor *> kernel_outputs;
142   std::vector<kernel::KernelTensorPtr> output_tensors;
143   CreateOutputAddr(kernel_mod_->GetOutputSizeList(), &output_tensors);
144   (void)std::transform(output_tensors.begin(), output_tensors.end(), std::back_inserter(kernel_outputs),
145                        [](kernel::KernelTensorPtr &tensor) { return tensor.get(); });
146 
147   // workspaces
148   std::vector<kernel::KernelTensor *> kernel_workspace;
149   const auto stream = AscendStreamMng::GetInstance().GetStream(stream_id_);
150 
151   // launch
152   auto ret_status = kernel_mod_->Launch(kernel_inputs, kernel_workspace, kernel_outputs, stream);
153   if (!ret_status) {
154     MS_LOG(EXCEPTION) << "Launch transdata single kernel failed";
155   }
156 }
157 }  // namespace mindspore::device::ascend
158