• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_ARGMAXANDMINWITHVALUE_GPU_KERNEL_H_
18 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_ARGMAXANDMINWITHVALUE_GPU_KERNEL_H_
19 
20 #include <vector>
21 #include <string>
22 #include <map>
23 #include "backend/kernel_compiler/gpu/gpu_kernel.h"
24 #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
25 #include "backend/kernel_compiler/gpu/cuda_impl/general_reduction_impl.cuh"
26 namespace mindspore {
27 namespace kernel {
28 template <typename T, typename S>
29 class ArgMaxAndMinWithValueGpuKernel : public GpuKernel {
30  public:
ArgMaxAndMinWithValueGpuKernel()31   ArgMaxAndMinWithValueGpuKernel() { ResetResource(); }
32   ~ArgMaxAndMinWithValueGpuKernel() override = default;
33 
GetInputSizeList()34   const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
GetOutputSizeList()35   const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
GetWorkspaceSizeList()36   const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
37 
Launch(const std::vector<AddressPtr> & inputs,const std::vector<AddressPtr> &,const std::vector<AddressPtr> & outputs,void * stream_ptr)38   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
39               const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
40     if (is_null_input_) {
41       return true;
42     }
43     T *input = GetDeviceAddress<T>(inputs, 0);
44     T *output = GetDeviceAddress<T>(outputs, 1);
45     S *index = GetDeviceAddress<S>(outputs, 0);
46     CalGeneralReduction(small_, input, bound_, outerSize_, innerSize_, index, output,
47                         reinterpret_cast<cudaStream_t>(stream_ptr));
48     return true;
49   }
50 
Init(const CNodePtr & kernel_node)51   bool Init(const CNodePtr &kernel_node) override {
52     std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
53     small_ = (kernel_name == "ArgMinWithValue") ? true : false;
54     std::vector<size_t> shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
55     auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 1);
56     is_null_input_ = CHECK_NULL_INPUT(shape) || CHECK_NULL_INPUT(output_shape);
57     if (is_null_input_) {
58       MS_LOG(WARNING) << "For 'ArgmaxwithvalueGpuKernel', input or output is null.";
59       InitSizeLists();
60       return true;
61     }
62     int64_t dims = SizeToLong(shape.size());
63     int64_t axis = GetAttr<int64_t>(kernel_node, "axis");
64     if (axis < -dims || axis >= dims) {
65       MS_LOG(ERROR) << "axis must be in the range [-rank, rank)";
66       return false;
67     }
68     if (axis < 0) {
69       axis += dims;
70     }
71     input_size_ = sizeof(T);
72     for (auto x : shape) {
73       input_size_ *= x;
74     }
75     output_size_ = sizeof(S);
76     for (auto x : output_shape) {
77       output_size_ *= x;
78     }
79     bound_ = static_cast<S>(shape[axis]);
80     if (shape[axis] != static_cast<size_t>(bound_)) {
81       MS_LOG(EXCEPTION) << "bound's shape is larger than index type and overflows when casting.";
82     }
83     outerSize_ = 1;
84     for (int64_t i = axis - 1; i >= 0; i--) {
85       outerSize_ *= shape[i];
86     }
87     innerSize_ = 1;
88     for (int64_t i = axis + 1; i < dims; i++) {
89       innerSize_ *= shape[i];
90     }
91     InitSizeLists();
92     return true;
93   }
94 
ResetResource()95   void ResetResource() noexcept override {
96     input_size_ = 0;
97     output_size_ = 0;
98     bound_ = 0;
99     outerSize_ = 0;
100     innerSize_ = 0;
101     is_null_input_ = false;
102     input_size_list_.clear();
103     output_size_list_.clear();
104     workspace_size_list_.clear();
105   }
106 
107  protected:
InitSizeLists()108   void InitSizeLists() override {
109     input_size_list_.push_back(input_size_);
110     output_size_list_.push_back(output_size_);
111     output_size_list_.push_back(output_size_ / sizeof(S) * sizeof(T));
112   }
113 
114  private:
115   bool small_ = false;
116   size_t input_size_;
117   size_t output_size_;
118   std::vector<size_t> input_size_list_;
119   std::vector<size_t> output_size_list_;
120   std::vector<size_t> workspace_size_list_;
121   S bound_;
122   size_t outerSize_;
123   size_t innerSize_;
124   bool is_null_input_;
125 };
126 }  // namespace kernel
127 }  // namespace mindspore
128 
129 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_ARGMAXANDMINWITHVALUE_GPU_KERNEL_H_
130