• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_BATCHOSPACE_KERNEL_H_
18 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_BATCHOSPACE_KERNEL_H_
19 
20 #include <vector>
21 #include <string>
22 #include <memory>
23 #include <map>
24 #include "plugin/device/gpu/kernel/gpu_kernel.h"
25 #include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
26 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/batchtospace_impl.cuh"
27 
28 namespace mindspore {
29 namespace kernel {
30 constexpr size_t SHAPE_SIZE = 4;
31 constexpr size_t CROPS_SHAPE_0 = 2;
32 constexpr size_t CROPS_SHAPE_1 = 2;
33 template <typename T>
34 class BatchToSpaceGpuKernelMod : public NativeGpuKernelMod {
35  public:
BatchToSpaceGpuKernelMod()36   BatchToSpaceGpuKernelMod() {
37     in_ = 0;
38     ic_ = 0;
39     ih_ = 0;
40     iw_ = 0;
41     on_ = 0;
42     oc_ = 0;
43     oh_ = 0;
44     ow_ = 0;
45     kernel_name_ = "BatchToSpace";
46     crops_.clear();
47     output_size_list_.clear();
48     input_shape_.clear();
49   }
50   ~BatchToSpaceGpuKernelMod() = default;
GetOutputSizeList()51   const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
GetWorkspaceSizeList()52   const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
53 
Launch(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & workspace,const std::vector<KernelTensor * > & outputs,void * stream_ptr)54   bool Launch(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &workspace,
55               const std::vector<KernelTensor *> &outputs, void *stream_ptr) override {
56     T *input = GetDeviceAddress<T>(inputs, 0);
57     T *output = GetDeviceAddress<T>(outputs, 0);
58 
59     size_t size = output_size_list_[0] / sizeof(T);
60 
61     auto status =
62       CalBatchToSpace<T>(size, input, in_, ih_, iw_, ic_, on_, oh_, ow_, oc_, crops_[0][0], crops_[0][1], crops_[1][0],
63                          crops_[1][1], block_size_, output, device_id_, reinterpret_cast<cudaStream_t>(stream_ptr));
64     CHECK_CUDA_STATUS(status, kernel_name_);
65     return true;
66   }
67 
Init(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & outputs)68   bool Init(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &outputs) override {
69     device_id_ = MsContext::GetInstance()->get_param<uint32_t>(MS_CTX_DEVICE_ID);
70     // wait for primitive unified between lite and cloud.
71     block_size_ = static_cast<size_t>(GetValue<int64_t>(primitive_->GetAttr("block_size")));
72     if (block_size_ < 1) {
73       MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'block_size' cannot be less than 1, but got "
74                         << block_size_;
75     }
76     // check crops
77     crops_ = GetValue<std::vector<std::vector<int64_t>>>(primitive_->GetAttr("crops"));
78     if (crops_.size() != CROPS_SHAPE_0) {
79       MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of 'crops' must be " << CROPS_SHAPE_0 << ", but got "
80                         << crops_.size();
81     }
82     if (crops_[0].size() != CROPS_SHAPE_1 || crops_[1].size() != CROPS_SHAPE_1) {
83       MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of element of 'crops' must be " << CROPS_SHAPE_1
84                         << ", but got the size of crops[0]: " << crops_[0].size()
85                         << ", the size of crops[1]: " << crops_[1].size();
86     }
87     CHECK_KERNEL_INPUTS_NUM(inputs.size(), 1, kernel_name_);
88     CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), 1, kernel_name_);
89     return true;
90   }
91 
Resize(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & outputs)92   int Resize(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &outputs) override {
93     if (int ret = KernelMod::Resize(inputs, outputs); ret != KRET_OK) {
94       return ret;
95     }
96     // check input_shape
97     auto input_shape = inputs[0]->GetShapeVector();
98     if (input_shape.size() != SHAPE_SIZE) {
99       MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input must be 4, but got "
100                         << input_shape.size();
101     }
102     if ((input_shape[0] % (block_size_ * block_size_)) != 0) {
103       MS_LOG(EXCEPTION) << "For '" << kernel_name_
104                         << "', input_shape[0] must be divisible by product of block_shape, but got input_shape[0]: "
105                         << input_shape[0] << ", block_shape: " << block_size_;
106     }
107     for (size_t idx = 0; idx < SHAPE_SIZE; ++idx) {
108       if (input_shape[idx] < 1) {
109         MS_LOG(EXCEPTION) << "For '" << kernel_name_
110                           << "', the element of shape of input cannot be less than 1, but got " << input_shape;
111       }
112     }
113     input_shape_.assign(input_shape.begin(), input_shape.end());
114     for (size_t idx_i = 0; idx_i < CROPS_SHAPE_0; ++idx_i) {
115       for (size_t idx_j = 0; idx_j < CROPS_SHAPE_1; ++idx_j) {
116         if (crops_[idx_i][idx_j] < 0) {
117           MS_LOG(EXCEPTION) << "For '" << kernel_name_
118                             << "', the element of 'crops' must be greater than or equal to 0, but got crops[" << idx_i
119                             << "][" << idx_j << "]: " << crops_[idx_i][idx_j];
120         }
121       }
122       auto tmp_shape = input_shape[idx_i + CROPS_SHAPE_1] * block_size_ - crops_[idx_i][0] - crops_[idx_i][1];
123       if (tmp_shape <= 0) {
124         MS_LOG(EXCEPTION) << "For '" << kernel_name_
125                           << "', the element of shape of output must be greater than 0, but got " << tmp_shape;
126       }
127     }
128     constexpr int IDX_2 = 2;
129     constexpr int IDX_3 = 3;
130     in_ = static_cast<size_t>(input_shape_[0]);
131     ic_ = static_cast<size_t>(input_shape_[1]);
132     ih_ = static_cast<size_t>(input_shape_[IDX_2]);
133     iw_ = static_cast<size_t>(input_shape_[IDX_3]);
134 
135     on_ = in_ / (block_size_ * block_size_);
136     oc_ = ic_;
137     oh_ = ih_ * block_size_ - crops_[0][0] - crops_[0][1];
138     ow_ = iw_ * block_size_ - crops_[1][0] - crops_[1][1];
139     return static_cast<int>(KRET_OK);
140   }
141 
142  private:
143   std::vector<std::vector<int64_t>> crops_;
144   std::vector<int64_t> input_shape_;
145   size_t block_size_;
146   size_t in_;
147   size_t ic_;
148   size_t ih_;
149   size_t iw_;
150   size_t on_;
151   size_t oc_;
152   size_t oh_;
153   size_t ow_;
154 };
155 }  // namespace kernel
156 }  // namespace mindspore
157 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_BATCHOSPACE_KERNEL_H_
158