• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "convert_gradient_impl.cuh"
18 
19 template <typename T>
ConvertGradientKernel(const size_t size,const size_t height_h,const size_t height_w,const size_t batchwidth,const size_t width,T * input_addr,T * output_addr)20 __global__ void ConvertGradientKernel(const size_t size, const size_t height_h, const size_t height_w,
21                                       const size_t batchwidth, const size_t width, T *input_addr, T *output_addr) {
22   for (size_t pointIdx = blockIdx.x * blockDim.x + threadIdx.x; pointIdx < (size); pointIdx += blockDim.x * gridDim.x) {
23     size_t dst_batchIdx = pointIdx / (height_h * height_w);
24     size_t dst_batchIdxX = dst_batchIdx / batchwidth;
25     size_t dst_batchIdxY = dst_batchIdx % batchwidth;
26     size_t dst_x = (pointIdx - dst_batchIdx * height_h * height_w) / height_w;
27     size_t dst_y = (pointIdx - dst_batchIdx * height_h * height_w) % height_w;
28     size_t src_coordinate = dst_batchIdxX * height_h * width + dst_x * width + dst_batchIdxY * height_w + dst_y;
29     output_addr[pointIdx] = input_addr[src_coordinate];
30   }
31 }
32 
33 template <typename T>
ConvertGradientBackKernel(const size_t size,const size_t height_h,const size_t height_w,const size_t batchwidth,const size_t width,T * input_addr,T * output_addr)34 __global__ void ConvertGradientBackKernel(const size_t size, const size_t height_h, const size_t height_w,
35                                           const size_t batchwidth, const size_t width, T *input_addr, T *output_addr) {
36   for (size_t pointIdx = blockIdx.x * blockDim.x + threadIdx.x; pointIdx < (size); pointIdx += blockDim.x * gridDim.x) {
37     size_t dst_batchIdx = pointIdx / (height_h * height_w);
38     size_t dst_batchIdxX = dst_batchIdx / batchwidth;
39     size_t dst_batchIdxY = dst_batchIdx % batchwidth;
40     size_t dst_x = (pointIdx - dst_batchIdx * height_h * height_w) / height_w;
41     size_t dst_y = (pointIdx - dst_batchIdx * height_h * height_w) % height_w;
42     size_t src_coordinate = dst_batchIdxX * height_h * width + dst_x * width + dst_batchIdxY * height_w + dst_y;
43     output_addr[src_coordinate] = input_addr[pointIdx];
44   }
45 }
46 
47 template <typename T>
ConvertGradientBackKernel(const size_t size,const size_t height_h,const size_t height_w,const size_t ori_h,const size_t ori_w,const size_t batchwidth,const size_t width,T * input_addr,T * output_addr)48 __global__ void ConvertGradientBackKernel(const size_t size, const size_t height_h, const size_t height_w,
49                                           const size_t ori_h, const size_t ori_w, const size_t batchwidth,
50                                           const size_t width, T *input_addr, T *output_addr) {
51   for (size_t pointIdx = blockIdx.x * blockDim.x + threadIdx.x; pointIdx < (size); pointIdx += blockDim.x * gridDim.x) {
52     size_t dst_batchIdx = pointIdx / (height_h * height_w);
53     size_t dst_batchIdxX = dst_batchIdx / batchwidth;
54     size_t dst_batchIdxY = dst_batchIdx % batchwidth;
55     size_t dst_x = (pointIdx - dst_batchIdx * height_h * height_w) / height_w;
56     size_t dst_y = (pointIdx - dst_batchIdx * height_h * height_w) % height_w;
57     size_t src_x = dst_batchIdxX * height_h + dst_x;
58     size_t src_y = dst_batchIdxY * height_w + dst_y;
59     if (src_x < ori_h && src_y < ori_w) {
60       size_t src_coordinate = src_x * ori_w + src_y;
61       output_addr[src_coordinate] = input_addr[pointIdx];
62     }
63   }
64 }
65 
66 template <typename T>
ConvertGradient(const size_t size,const size_t height_h,const size_t height_w,const size_t batchwidth,const size_t width,T * input_addr,T * output_addr,cudaStream_t cuda_stream)67 cudaError_t ConvertGradient(const size_t size, const size_t height_h, const size_t height_w, const size_t batchwidth,
68                             const size_t width, T *input_addr, T *output_addr, cudaStream_t cuda_stream) {
69   ConvertGradientKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, height_h, height_w, batchwidth, width,
70                                                                            input_addr, output_addr);
71   return GetCudaStatus();
72 }
73 
74 template <typename T>
ConvertGradientBack(const size_t size,const size_t height_h,const size_t height_w,const size_t batchwidth,const size_t width,T * input_addr,T * output_addr,cudaStream_t cuda_stream)75 cudaError_t ConvertGradientBack(const size_t size, const size_t height_h, const size_t height_w,
76                                 const size_t batchwidth, const size_t width, T *input_addr, T *output_addr,
77                                 cudaStream_t cuda_stream) {
78   ConvertGradientBackKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, height_h, height_w, batchwidth,
79                                                                                width, input_addr, output_addr);
80   return GetCudaStatus();
81 }
82 
83 template <typename T>
ConvertGradientBack(const size_t size,const size_t height_h,const size_t height_w,const size_t ori_h,const size_t ori_w,const size_t batchwidth,const size_t width,T * input_addr,T * output_addr,cudaStream_t cuda_stream)84 cudaError_t ConvertGradientBack(const size_t size, const size_t height_h, const size_t height_w, const size_t ori_h,
85                                 const size_t ori_w, const size_t batchwidth, const size_t width, T *input_addr,
86                                 T *output_addr, cudaStream_t cuda_stream) {
87   ConvertGradientBackKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(
88     size, height_h, height_w, ori_h, ori_w, batchwidth, width, input_addr, output_addr);
89   return GetCudaStatus();
90 }
91 
92 template CUDA_LIB_EXPORT cudaError_t ConvertGradient<float>(const size_t size, const size_t height_h,
93                                                             const size_t height_w, const size_t batchwidth,
94                                                             const size_t width, float *input_addr, float *output_addr,
95                                                             cudaStream_t cuda_stream);
96 
97 template CUDA_LIB_EXPORT cudaError_t ConvertGradientBack<float>(const size_t size, const size_t height_h,
98                                                                 const size_t height_w, const size_t batchwidth,
99                                                                 const size_t width, float *input_addr,
100                                                                 float *output_addr, cudaStream_t cuda_stream);
101 
102 template CUDA_LIB_EXPORT cudaError_t ConvertGradientBack<float>(const size_t size, const size_t height_h,
103                                                                 const size_t height_w, const size_t ori_h,
104                                                                 const size_t ori_w, const size_t batchwidth,
105                                                                 const size_t width, float *input_addr,
106                                                                 float *output_addr, cudaStream_t cuda_stream);
107