1 /**
2 * Copyright 2019-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "backend/kernel_compiler/gpu/data/dataset_iterator_kernel.h"
17
18 #include <cuda_runtime_api.h>
19 #include <memory>
20 #include <string>
21 #include <vector>
22 #include <algorithm>
23 #include "backend/kernel_compiler/gpu/data/dataset_utils.h"
24 #include "backend/kernel_compiler/common_utils.h"
25 #ifndef ENABLE_SECURITY
26 #include "profiler/device/gpu/gpu_profiling.h"
27 #endif
28 #include "runtime/device/gpu/gpu_buffer_mgr.h"
29 #include "runtime/device/gpu/gpu_common.h"
30 #ifdef ENABLE_DUMP_IR
31 #include "debug/rdr/running_data_recorder.h"
32 #endif
33
34 namespace mindspore {
35 namespace kernel {
36 using mindspore::device::GpuBufferMgr;
37 using mindspore::device::HandleMgr;
38
DatasetIteratorKernel()39 DatasetIteratorKernel::DatasetIteratorKernel()
40 : handle_(HandleMgr::INVALID_HANDLE), total_bytes_(0), profiling_enable_(false), profiling_op_(nullptr) {}
41
~DatasetIteratorKernel()42 DatasetIteratorKernel::~DatasetIteratorKernel() { GpuBufferMgr::GetInstance().Close(handle_); }
43
ReleaseResource()44 void DatasetIteratorKernel::ReleaseResource() {
45 GpuBufferMgr::GetInstance().Close(handle_);
46 handle_ = HandleMgr::INVALID_HANDLE;
47 }
48
GetInputSizeList() const49 const std::vector<size_t> &DatasetIteratorKernel::GetInputSizeList() const { return input_size_list_; }
50
GetOutputSizeList() const51 const std::vector<size_t> &DatasetIteratorKernel::GetOutputSizeList() const { return output_size_list_; }
52
GetWorkspaceSizeList() const53 const std::vector<size_t> &DatasetIteratorKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
54
Init(const CNodePtr & kernel_node)55 bool DatasetIteratorKernel::Init(const CNodePtr &kernel_node) {
56 MS_EXCEPTION_IF_NULL(kernel_node);
57 kernel_node_ = kernel_node;
58 queue_name_ = GetAttr<std::string>(kernel_node, "shared_name");
59 std::vector<std::vector<int>> shapes;
60 std::vector<TypePtr> types;
61 GetShapeAndType(kernel_node, &shapes, &types);
62 for (auto item : types) {
63 MS_EXCEPTION_IF_NULL(item);
64 }
65 for (size_t i = 0; i < shapes.size(); i++) {
66 int unit = UnitSizeInBytes(types[i]->type_id());
67 int nums = ElementNums(shapes[i]);
68 int bytes = unit * nums;
69 output_size_list_.push_back(bytes);
70 total_bytes_ += bytes;
71 }
72
73 handle_ = GpuBufferMgr::GetInstance().Open(0, queue_name_, output_size_list_);
74 if (handle_ == HandleMgr::INVALID_HANDLE) {
75 MS_LOG(EXCEPTION) << "Gpu Queue(" << queue_name_ << ") Open Failed";
76 }
77
78 #ifndef ENABLE_SECURITY
79 auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
80 MS_EXCEPTION_IF_NULL(profiler_inst);
81 profiling_enable_ = profiler_inst->GetEnableFlag();
82 if (profiling_enable_) {
83 std::string path = profiler_inst->ProfileDataPath();
84 profiling_op_ = std::make_shared<GetNextProfiling>(path);
85 MS_EXCEPTION_IF_NULL(profiling_op_);
86 profiler_inst->RegisterProfilingOp(profiling_op_);
87 }
88 #endif
89 return true;
90 }
91
InitSizeLists()92 void DatasetIteratorKernel::InitSizeLists() { return; }
93
ReadDevice(void ** addr,size_t * len)94 bool DatasetIteratorKernel::ReadDevice(void **addr, size_t *len) {
95 uint64_t start_time_stamp = 0;
96 uint32_t queue_size = 0;
97
98 int repeat = 0;
99 while (true) {
100 if (profiling_enable_) {
101 start_time_stamp = profiling_op_->GetTimeStamp();
102 queue_size = GpuBufferMgr::GetInstance().Size(handle_);
103 }
104 auto ret = GpuBufferMgr::GetInstance().Front(handle_, addr, len);
105 if (ret == device::SUCCESS) {
106 if (profiling_enable_) {
107 uint64_t end_time_stamp = profiling_op_->GetTimeStamp();
108 profiling_op_->RecordData(queue_size, start_time_stamp, end_time_stamp);
109 }
110 break;
111 }
112
113 if (ret == device::TIMEOUT) {
114 repeat++;
115 if (repeat < 10) {
116 MS_LOG(INFO) << "Waiting for data...(" << repeat << " / 10)";
117 continue;
118 } else {
119 #ifdef ENABLE_DUMP_IR
120 mindspore::RDR::TriggerAll();
121 #endif
122 MS_LOG(EXCEPTION) << "Get data timeout";
123 }
124 }
125
126 if (profiling_enable_) {
127 uint64_t end_time_stamp = profiling_op_->GetTimeStamp();
128 profiling_op_->RecordData(queue_size, start_time_stamp, end_time_stamp);
129 }
130 MS_LOG(ERROR) << "Get data failed, errcode " << ret;
131 return false;
132 }
133 return true;
134 }
135
Launch(const std::vector<AddressPtr> &,const std::vector<AddressPtr> &,const std::vector<AddressPtr> & outputs,void * stream)136 bool DatasetIteratorKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
137 const std::vector<AddressPtr> &outputs, void *stream) {
138 if (handle_ == HandleMgr::INVALID_HANDLE) {
139 handle_ = GpuBufferMgr::GetInstance().Open(0, queue_name_, output_size_list_);
140 if (handle_ == HandleMgr::INVALID_HANDLE) {
141 MS_LOG(EXCEPTION) << "Gpu Queue(" << queue_name_ << ") Open Failed";
142 }
143 }
144
145 void *addr = nullptr;
146 size_t len = 0;
147 if (!ReadDevice(&addr, &len)) {
148 return false;
149 }
150 if (total_bytes_ != len) {
151 MS_LOG(ERROR) << "Dataset front error. read: " << len << ", expect: " << total_bytes_ << ", ";
152 return false;
153 }
154
155 for (size_t i = 0; i < output_size_list_.size(); i++) {
156 void *output_addr = GetDeviceAddress<void>(outputs, i);
157 CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
158 cudaMemcpyAsync(output_addr, addr, output_size_list_[i], cudaMemcpyDeviceToDevice,
159 reinterpret_cast<cudaStream_t>(stream)),
160 "Cuda Memcpy Failed");
161 addr = reinterpret_cast<unsigned char *>(addr) + output_size_list_[i];
162 }
163
164 CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)),
165 "cudaStreamSynchronize failed");
166 (void)GpuBufferMgr::GetInstance().Pop(handle_);
167 return true;
168 }
169 } // namespace kernel
170 } // namespace mindspore
171