1 /** 2 * Copyright 2021-2022 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_BATCHOSPACE_KERNEL_H_ 18 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_BATCHOSPACE_KERNEL_H_ 19 20 #include <vector> 21 #include <string> 22 #include <memory> 23 #include <map> 24 #include "plugin/device/gpu/kernel/gpu_kernel.h" 25 #include "plugin/device/gpu/kernel/gpu_kernel_factory.h" 26 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/batchtospace_impl.cuh" 27 28 namespace mindspore { 29 namespace kernel { 30 constexpr size_t SHAPE_SIZE = 4; 31 constexpr size_t CROPS_SHAPE_0 = 2; 32 constexpr size_t CROPS_SHAPE_1 = 2; 33 template <typename T> 34 class BatchToSpaceGpuKernelMod : public NativeGpuKernelMod { 35 public: BatchToSpaceGpuKernelMod()36 BatchToSpaceGpuKernelMod() { 37 in_ = 0; 38 ic_ = 0; 39 ih_ = 0; 40 iw_ = 0; 41 on_ = 0; 42 oc_ = 0; 43 oh_ = 0; 44 ow_ = 0; 45 kernel_name_ = "BatchToSpace"; 46 crops_.clear(); 47 output_size_list_.clear(); 48 input_shape_.clear(); 49 } 50 ~BatchToSpaceGpuKernelMod() = default; GetOutputSizeList()51 const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } GetWorkspaceSizeList()52 const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } 53 Launch(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & workspace,const std::vector<KernelTensor * > & outputs,void * stream_ptr)54 bool Launch(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &workspace, 55 const std::vector<KernelTensor *> &outputs, void *stream_ptr) override { 56 T *input = GetDeviceAddress<T>(inputs, 0); 57 T *output = GetDeviceAddress<T>(outputs, 0); 58 59 size_t size = output_size_list_[0] / sizeof(T); 60 61 auto status = 62 CalBatchToSpace<T>(size, input, in_, ih_, iw_, ic_, on_, oh_, ow_, oc_, crops_[0][0], crops_[0][1], crops_[1][0], 63 crops_[1][1], block_size_, output, device_id_, reinterpret_cast<cudaStream_t>(stream_ptr)); 64 CHECK_CUDA_STATUS(status, kernel_name_); 65 return true; 66 } 67 Init(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & outputs)68 bool Init(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &outputs) override { 69 device_id_ = MsContext::GetInstance()->get_param<uint32_t>(MS_CTX_DEVICE_ID); 70 // wait for primitive unified between lite and cloud. 71 block_size_ = static_cast<size_t>(GetValue<int64_t>(primitive_->GetAttr("block_size"))); 72 if (block_size_ < 1) { 73 MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'block_size' cannot be less than 1, but got " 74 << block_size_; 75 } 76 // check crops 77 crops_ = GetValue<std::vector<std::vector<int64_t>>>(primitive_->GetAttr("crops")); 78 if (crops_.size() != CROPS_SHAPE_0) { 79 MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of 'crops' must be " << CROPS_SHAPE_0 << ", but got " 80 << crops_.size(); 81 } 82 if (crops_[0].size() != CROPS_SHAPE_1 || crops_[1].size() != CROPS_SHAPE_1) { 83 MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the size of element of 'crops' must be " << CROPS_SHAPE_1 84 << ", but got the size of crops[0]: " << crops_[0].size() 85 << ", the size of crops[1]: " << crops_[1].size(); 86 } 87 CHECK_KERNEL_INPUTS_NUM(inputs.size(), 1, kernel_name_); 88 CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), 1, kernel_name_); 89 return true; 90 } 91 Resize(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & outputs)92 int Resize(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &outputs) override { 93 if (int ret = KernelMod::Resize(inputs, outputs); ret != KRET_OK) { 94 return ret; 95 } 96 // check input_shape 97 auto input_shape = inputs[0]->GetShapeVector(); 98 if (input_shape.size() != SHAPE_SIZE) { 99 MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input must be 4, but got " 100 << input_shape.size(); 101 } 102 if ((input_shape[0] % (block_size_ * block_size_)) != 0) { 103 MS_LOG(EXCEPTION) << "For '" << kernel_name_ 104 << "', input_shape[0] must be divisible by product of block_shape, but got input_shape[0]: " 105 << input_shape[0] << ", block_shape: " << block_size_; 106 } 107 for (size_t idx = 0; idx < SHAPE_SIZE; ++idx) { 108 if (input_shape[idx] < 1) { 109 MS_LOG(EXCEPTION) << "For '" << kernel_name_ 110 << "', the element of shape of input cannot be less than 1, but got " << input_shape; 111 } 112 } 113 input_shape_.assign(input_shape.begin(), input_shape.end()); 114 for (size_t idx_i = 0; idx_i < CROPS_SHAPE_0; ++idx_i) { 115 for (size_t idx_j = 0; idx_j < CROPS_SHAPE_1; ++idx_j) { 116 if (crops_[idx_i][idx_j] < 0) { 117 MS_LOG(EXCEPTION) << "For '" << kernel_name_ 118 << "', the element of 'crops' must be greater than or equal to 0, but got crops[" << idx_i 119 << "][" << idx_j << "]: " << crops_[idx_i][idx_j]; 120 } 121 } 122 auto tmp_shape = input_shape[idx_i + CROPS_SHAPE_1] * block_size_ - crops_[idx_i][0] - crops_[idx_i][1]; 123 if (tmp_shape <= 0) { 124 MS_LOG(EXCEPTION) << "For '" << kernel_name_ 125 << "', the element of shape of output must be greater than 0, but got " << tmp_shape; 126 } 127 } 128 constexpr int IDX_2 = 2; 129 constexpr int IDX_3 = 3; 130 in_ = static_cast<size_t>(input_shape_[0]); 131 ic_ = static_cast<size_t>(input_shape_[1]); 132 ih_ = static_cast<size_t>(input_shape_[IDX_2]); 133 iw_ = static_cast<size_t>(input_shape_[IDX_3]); 134 135 on_ = in_ / (block_size_ * block_size_); 136 oc_ = ic_; 137 oh_ = ih_ * block_size_ - crops_[0][0] - crops_[0][1]; 138 ow_ = iw_ * block_size_ - crops_[1][0] - crops_[1][1]; 139 return static_cast<int>(KRET_OK); 140 } 141 142 private: 143 std::vector<std::vector<int64_t>> crops_; 144 std::vector<int64_t> input_shape_; 145 size_t block_size_; 146 size_t in_; 147 size_t ic_; 148 size_t ih_; 149 size_t iw_; 150 size_t on_; 151 size_t oc_; 152 size_t oh_; 153 size_t ow_; 154 }; 155 } // namespace kernel 156 } // namespace mindspore 157 #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_BATCHOSPACE_KERNEL_H_ 158