• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "backend/kernel_compiler/cpu/sparse_apply_adam_cpu_kernel.h"
18 #include "backend/kernel_compiler/common_utils.h"
19 #include "runtime/device/cpu/cpu_device_address.h"
20 
21 namespace mindspore {
22 namespace kernel {
23 namespace {
24 constexpr size_t kSparseApplyAdamInputsNum = 11;
25 constexpr size_t kSparseApplyAdamWorkspaceSize = 5;
26 
27 template <typename T>
ComputeAdam(MultiThreadComputeParams<T> * input_params,size_t start,size_t end)28 void ComputeAdam(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) {
29   MS_EXCEPTION_IF_NULL(input_params);
30   auto m = input_params->m_;
31   auto m_t = input_params->m_t_;
32   auto v = input_params->v_;
33   const auto beta1 = input_params->beta1_;
34   const auto beta2 = input_params->beta2_;
35   const auto use_nesterov = input_params->use_nesterov_;
36   const auto unique_sparse_grad = input_params->sparse_grad_;
37   const auto var_first_dim_size = input_params->var_first_dim_size_;
38   const auto var_outer_dim_size = input_params->var_outer_dim_size_;
39   for (size_t i = start; i < end; ++i) {
40     T index = unique_sparse_grad.indices_[i];
41     if (index < 0 || LongToSize(index) >= var_first_dim_size) {
42       MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range after unique process";
43     }
44     size_t start_index = var_outer_dim_size * static_cast<size_t>(index);
45     size_t end_index = start_index + var_outer_dim_size;
46     for (size_t j = start_index, k = var_outer_dim_size * i; j < end_index; ++j, ++k) {
47       auto summed_grad = unique_sparse_grad.value_[k];
48       m[j] += (1 - beta1) * summed_grad;
49       v[j] += (1 - beta2) * summed_grad * summed_grad;
50       if (use_nesterov) {
51         m_t[j] = m[j] * beta1 + (1 - beta1) * summed_grad;
52       }
53     }
54   }
55 }
56 
57 template <typename T>
ComputeMomentum(MultiThreadComputeParams<T> * input_params,size_t start,size_t end)58 void ComputeMomentum(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) {
59   MS_EXCEPTION_IF_NULL(input_params);
60   auto m = input_params->m_;
61   auto v = input_params->v_;
62   const auto beta1 = input_params->beta1_;
63   const auto beta2 = input_params->beta2_;
64   for (size_t i = start; i < end; ++i) {
65     m[i] *= beta1;
66     v[i] *= beta2;
67   }
68 }
69 
70 template <typename T>
ComputeWeight(MultiThreadComputeParams<T> * input_params,size_t start,size_t end)71 void ComputeWeight(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) {
72   MS_EXCEPTION_IF_NULL(input_params);
73   auto var = input_params->var_;
74   const auto *m = input_params->m_;
75   const auto *v = input_params->v_;
76   const auto lr = input_params->lr_;
77   const auto epsilon = input_params->epsilon_;
78   for (size_t i = start; i < end; ++i) {
79     var[i] -= lr * m[i] / (std::sqrt(v[i]) + epsilon);
80   }
81 }
82 }  // namespace
83 
84 template <typename T>
InitWorkspaceSize()85 void SparseApplyAdamCPUKernel::InitWorkspaceSize() {
86   (void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
87   (void)workspace_size_list_.emplace_back(indices_size_ * sizeof(T));
88   (void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
89   (void)workspace_size_list_.emplace_back(indices_size_ * sizeof(T));
90   (void)workspace_size_list_.emplace_back(var_first_dim_size_ * var_outer_dim_size_ * sizeof(float));
91 }
92 
InitInputOutputSize(const CNodePtr & kernel_node)93 void SparseApplyAdamCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
94   CPUKernel::InitInputOutputSize(kernel_node);
95   if (indices_data_type_ == kNumberTypeInt32) {
96     InitWorkspaceSize<int>();
97   } else {
98     InitWorkspaceSize<int64_t>();
99   }
100 }
101 
InitKernel(const CNodePtr & kernel_node)102 void SparseApplyAdamCPUKernel::InitKernel(const CNodePtr &kernel_node) {
103   MS_EXCEPTION_IF_NULL(kernel_node);
104   kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
105   std::vector<size_t> var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
106   std::vector<size_t> m_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
107   std::vector<size_t> v_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
108   std::vector<size_t> grad_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 9);
109   std::vector<size_t> indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 10);
110   if (var_shape.empty()) {
111     MS_LOG(EXCEPTION) << "var must be at least 1D";
112   }
113   if (!IsSameShape(var_shape, m_shape)) {
114     MS_LOG(EXCEPTION) << "var and m should have the same shape";
115   }
116   if (!IsSameShape(var_shape, v_shape)) {
117     MS_LOG(EXCEPTION) << "var and v should have the same shape";
118   }
119   if (var_shape.size() != grad_shape.size()) {
120     MS_LOG(EXCEPTION) << "var and grad should have the same shape size";
121   }
122   var_first_dim_size_ = var_shape[0];
123   for (size_t i = 1; i < var_shape.size(); ++i) {
124     if (var_shape[i] != grad_shape[i]) {
125       MS_LOG(EXCEPTION) << "The shape of var and grad must equal in dimension " << i;
126     }
127     var_outer_dim_size_ *= var_shape[i];
128   }
129   if (indices_shape.size() != 1) {
130     MS_LOG(EXCEPTION) << "Indices must be 1D!";
131   }
132   indices_size_ = indices_shape[0];
133   if (grad_shape[0] != indices_size_) {
134     MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices";
135   }
136   if (AnfAlgo::HasNodeAttr(USE_NESTEROV, kernel_node)) {
137     use_nesterov_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "use_nesterov");
138   }
139   indices_data_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 10);
140 }
141 
142 template <typename T>
LaunchKernel(const std::vector<kernel::AddressPtr> & inputs,const std::vector<kernel::AddressPtr> & workspace) const143 void SparseApplyAdamCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
144                                             const std::vector<kernel::AddressPtr> &workspace) const {
145   auto *var = reinterpret_cast<float *>(inputs[0]->addr);
146   auto *m = reinterpret_cast<float *>(inputs[1]->addr);
147   auto *v = reinterpret_cast<float *>(inputs[2]->addr);
148   auto beta1_power = reinterpret_cast<float *>(inputs[3]->addr)[0];
149   if (beta1_power == 1) {
150     MS_LOG(EXCEPTION) << "The beta1_power should not be 1";
151   }
152   auto beta2_power = reinterpret_cast<float *>(inputs[4]->addr)[0];
153   auto lr = reinterpret_cast<float *>(inputs[5]->addr)[0];
154   auto beta1 = reinterpret_cast<float *>(inputs[6]->addr)[0];
155   auto beta2 = reinterpret_cast<float *>(inputs[7]->addr)[0];
156   auto epsilon = reinterpret_cast<float *>(inputs[8]->addr)[0];
157   auto *grad = reinterpret_cast<float *>(inputs[9]->addr);
158   auto *indices = reinterpret_cast<T *>(inputs[10]->addr);
159   auto *new_grad = reinterpret_cast<float *>(workspace[0]->addr);
160   auto *new_indices = reinterpret_cast<T *>(workspace[1]->addr);
161   auto *workspace_grad = reinterpret_cast<float *>(workspace[2]->addr);
162   auto *workspace_indices = reinterpret_cast<T *>(workspace[3]->addr);
163   auto *m_t = reinterpret_cast<float *>(workspace[4]->addr);
164 
165   SparseGradient<T> unique_sparse_grad({new_grad, new_indices, indices_size_});
166   SparseGradient<T> workspace_sparse_grad({workspace_grad, workspace_indices, indices_size_});
167   SparseGradient<T> input_sparse_grad({grad, indices, indices_size_});
168   ReduceSparseGradientParam<T> param;
169   param.input_grad_ = &input_sparse_grad;
170   param.workspace_grad_ = &workspace_sparse_grad;
171   param.output_grad_ = &unique_sparse_grad;
172   param.max_index_ = var_first_dim_size_;
173   param.value_stride_ = var_outer_dim_size_;
174   BucketReduceSparseGradient(param);
175 
176   size_t total_dim_size = var_first_dim_size_ * var_outer_dim_size_;
177   lr = lr * std::sqrt(1 - beta2_power) / (1 - beta1_power);
178 
179   MultiThreadComputeParams<T> input_params;
180   input_params.m_ = m;
181   input_params.v_ = v;
182   input_params.beta1_ = beta1;
183   input_params.beta2_ = beta2;
184   MultiThreadCompute<T>(ComputeMomentum<T>, &input_params, total_dim_size);
185   input_params.m_t_ = m_t;
186   input_params.use_nesterov_ = use_nesterov_;
187   input_params.sparse_grad_ = unique_sparse_grad;
188   input_params.var_first_dim_size_ = var_first_dim_size_;
189   input_params.var_outer_dim_size_ = var_outer_dim_size_;
190   MultiThreadCompute<T>(ComputeAdam<T>, &input_params, unique_sparse_grad.indices_size_);
191 
192   if (use_nesterov_) {
193     input_params.m_ = input_params.m_t_;
194   }
195   input_params.var_ = var;
196   input_params.lr_ = lr;
197   input_params.epsilon_ = epsilon;
198   MultiThreadCompute<T>(ComputeWeight<T>, &input_params, total_dim_size);
199 }
200 
Launch(const std::vector<kernel::AddressPtr> & inputs,const std::vector<kernel::AddressPtr> & workspace,const std::vector<kernel::AddressPtr> &)201 bool SparseApplyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
202                                       const std::vector<kernel::AddressPtr> &workspace,
203                                       const std::vector<kernel::AddressPtr> &) {
204   CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseApplyAdamInputsNum, kernel_name_);
205   CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSparseApplyAdamWorkspaceSize, kernel_name_);
206   if (indices_data_type_ == kNumberTypeInt32) {
207     LaunchKernel<int>(inputs, workspace);
208   } else if (indices_data_type_ == kNumberTypeInt64) {
209     LaunchKernel<int64_t>(inputs, workspace);
210   } else {
211     MS_LOG(EXCEPTION) << "Unsupported indices data type: " << indices_data_type_;
212   }
213   return true;
214 }
215 }  // namespace kernel
216 }  // namespace mindspore
217