• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "backend/kernel_compiler/gpu/data/dataset_iterator_kernel.h"
17 
18 #include <cuda_runtime_api.h>
19 #include <memory>
20 #include <string>
21 #include <vector>
22 #include <algorithm>
23 #include "backend/kernel_compiler/gpu/data/dataset_utils.h"
24 #include "backend/kernel_compiler/common_utils.h"
25 #ifndef ENABLE_SECURITY
26 #include "profiler/device/gpu/gpu_profiling.h"
27 #endif
28 #include "runtime/device/gpu/gpu_buffer_mgr.h"
29 #include "runtime/device/gpu/gpu_common.h"
30 #ifdef ENABLE_DUMP_IR
31 #include "debug/rdr/running_data_recorder.h"
32 #endif
33 
34 namespace mindspore {
35 namespace kernel {
36 using mindspore::device::GpuBufferMgr;
37 using mindspore::device::HandleMgr;
38 
DatasetIteratorKernel()39 DatasetIteratorKernel::DatasetIteratorKernel()
40     : handle_(HandleMgr::INVALID_HANDLE), total_bytes_(0), profiling_enable_(false), profiling_op_(nullptr) {}
41 
~DatasetIteratorKernel()42 DatasetIteratorKernel::~DatasetIteratorKernel() { GpuBufferMgr::GetInstance().Close(handle_); }
43 
ReleaseResource()44 void DatasetIteratorKernel::ReleaseResource() {
45   GpuBufferMgr::GetInstance().Close(handle_);
46   handle_ = HandleMgr::INVALID_HANDLE;
47 }
48 
GetInputSizeList() const49 const std::vector<size_t> &DatasetIteratorKernel::GetInputSizeList() const { return input_size_list_; }
50 
GetOutputSizeList() const51 const std::vector<size_t> &DatasetIteratorKernel::GetOutputSizeList() const { return output_size_list_; }
52 
GetWorkspaceSizeList() const53 const std::vector<size_t> &DatasetIteratorKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
54 
Init(const CNodePtr & kernel_node)55 bool DatasetIteratorKernel::Init(const CNodePtr &kernel_node) {
56   MS_EXCEPTION_IF_NULL(kernel_node);
57   kernel_node_ = kernel_node;
58   queue_name_ = GetAttr<std::string>(kernel_node, "shared_name");
59   std::vector<std::vector<int>> shapes;
60   std::vector<TypePtr> types;
61   GetShapeAndType(kernel_node, &shapes, &types);
62   for (auto item : types) {
63     MS_EXCEPTION_IF_NULL(item);
64   }
65   for (size_t i = 0; i < shapes.size(); i++) {
66     int unit = UnitSizeInBytes(types[i]->type_id());
67     int nums = ElementNums(shapes[i]);
68     int bytes = unit * nums;
69     output_size_list_.push_back(bytes);
70     total_bytes_ += bytes;
71   }
72 
73   handle_ = GpuBufferMgr::GetInstance().Open(0, queue_name_, output_size_list_);
74   if (handle_ == HandleMgr::INVALID_HANDLE) {
75     MS_LOG(EXCEPTION) << "Gpu Queue(" << queue_name_ << ") Open Failed";
76   }
77 
78 #ifndef ENABLE_SECURITY
79   auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
80   MS_EXCEPTION_IF_NULL(profiler_inst);
81   profiling_enable_ = profiler_inst->GetEnableFlag();
82   if (profiling_enable_) {
83     std::string path = profiler_inst->ProfileDataPath();
84     profiling_op_ = std::make_shared<GetNextProfiling>(path);
85     MS_EXCEPTION_IF_NULL(profiling_op_);
86     profiler_inst->RegisterProfilingOp(profiling_op_);
87   }
88 #endif
89   return true;
90 }
91 
InitSizeLists()92 void DatasetIteratorKernel::InitSizeLists() { return; }
93 
ReadDevice(void ** addr,size_t * len)94 bool DatasetIteratorKernel::ReadDevice(void **addr, size_t *len) {
95   uint64_t start_time_stamp = 0;
96   uint32_t queue_size = 0;
97 
98   int repeat = 0;
99   while (true) {
100     if (profiling_enable_) {
101       start_time_stamp = profiling_op_->GetTimeStamp();
102       queue_size = GpuBufferMgr::GetInstance().Size(handle_);
103     }
104     auto ret = GpuBufferMgr::GetInstance().Front(handle_, addr, len);
105     if (ret == device::SUCCESS) {
106       if (profiling_enable_) {
107         uint64_t end_time_stamp = profiling_op_->GetTimeStamp();
108         profiling_op_->RecordData(queue_size, start_time_stamp, end_time_stamp);
109       }
110       break;
111     }
112 
113     if (ret == device::TIMEOUT) {
114       repeat++;
115       if (repeat < 10) {
116         MS_LOG(INFO) << "Waiting for data...(" << repeat << " / 10)";
117         continue;
118       } else {
119 #ifdef ENABLE_DUMP_IR
120         mindspore::RDR::TriggerAll();
121 #endif
122         MS_LOG(EXCEPTION) << "Get data timeout";
123       }
124     }
125 
126     if (profiling_enable_) {
127       uint64_t end_time_stamp = profiling_op_->GetTimeStamp();
128       profiling_op_->RecordData(queue_size, start_time_stamp, end_time_stamp);
129     }
130     MS_LOG(ERROR) << "Get data failed, errcode " << ret;
131     return false;
132   }
133   return true;
134 }
135 
Launch(const std::vector<AddressPtr> &,const std::vector<AddressPtr> &,const std::vector<AddressPtr> & outputs,void * stream)136 bool DatasetIteratorKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
137                                    const std::vector<AddressPtr> &outputs, void *stream) {
138   if (handle_ == HandleMgr::INVALID_HANDLE) {
139     handle_ = GpuBufferMgr::GetInstance().Open(0, queue_name_, output_size_list_);
140     if (handle_ == HandleMgr::INVALID_HANDLE) {
141       MS_LOG(EXCEPTION) << "Gpu Queue(" << queue_name_ << ") Open Failed";
142     }
143   }
144 
145   void *addr = nullptr;
146   size_t len = 0;
147   if (!ReadDevice(&addr, &len)) {
148     return false;
149   }
150   if (total_bytes_ != len) {
151     MS_LOG(ERROR) << "Dataset front error. read: " << len << ", expect: " << total_bytes_ << ", ";
152     return false;
153   }
154 
155   for (size_t i = 0; i < output_size_list_.size(); i++) {
156     void *output_addr = GetDeviceAddress<void>(outputs, i);
157     CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
158                                cudaMemcpyAsync(output_addr, addr, output_size_list_[i], cudaMemcpyDeviceToDevice,
159                                                reinterpret_cast<cudaStream_t>(stream)),
160                                "Cuda Memcpy Failed");
161     addr = reinterpret_cast<unsigned char *>(addr) + output_size_list_[i];
162   }
163 
164   CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)),
165                              "cudaStreamSynchronize failed");
166   (void)GpuBufferMgr::GetInstance().Pop(handle_);
167   return true;
168 }
169 }  // namespace kernel
170 }  // namespace mindspore
171