• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_SOFTMAX_IMPL_CUH_
18 #define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_SOFTMAX_IMPL_CUH_
19 #include <cuda_fp16.h>
20 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
21 
22 template <typename T, bool is_cuda>
23 struct AccumulateType {};
24 template <>
25 struct AccumulateType<half, true> {
26   using type = float;
27 };
28 template <>
29 struct AccumulateType<float, true> {
30   using type = float;
31 };
32 template <>
33 struct AccumulateType<double, true> {
34   using type = float;
35 };
36 
37 template <typename T, bool is_cuda>
38 using acc_type = typename AccumulateType<T, is_cuda>::type;
39 
40 template <typename T, typename AccumT, typename OutT, bool is_log_softmax>
41 struct SoftMaxForwardEpilogue {
SoftMaxForwardEpilogueSoftMaxForwardEpilogue42   __device__ __forceinline__ SoftMaxForwardEpilogue(AccumT max_input, AccumT sum)
43       : max_input(max_input), sum(is_log_softmax == true ? std::log(sum) : sum) {}
operator ()SoftMaxForwardEpilogue44   __device__ __forceinline__ OutT operator()(T input) const {
45     return is_log_softmax == true ? static_cast<OutT>((AccumT)input - max_input - sum)
46                                   : static_cast<OutT>(std::exp((AccumT)input - max_input) / sum);
47   }
48   const AccumT max_input;
49   const AccumT sum;
50 };
51 
52 // aligned vector generates vectorized load/store on CUDA
53 template <typename scalar_t>
54 struct alignas(sizeof(scalar_t) * sizeof(float4) / sizeof(scalar_t)) aligned_vector {
55   scalar_t val[sizeof(float4) / sizeof(scalar_t)];
56 };
57 
58 template <typename T, bool is_log_softmax>
59 cudaError_t Softmax(T *input_, T *output_, size_t dim_size_, size_t outer_size_, size_t inner_size_, size_t device_id,
60                     cudaStream_t cuda_stream);
61 
62 #endif  // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_SOFTMAX_IMPL_CUH_
63