• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "backend/kernel_compiler/cpu/sparse_apply_proximal_adagrad_cpu_kernel.h"
18 #include "backend/kernel_compiler/common_utils.h"
19 #include "runtime/device/cpu/cpu_device_address.h"
20 
21 namespace mindspore {
22 namespace kernel {
23 namespace {
24 constexpr size_t kSparseApplyProximalAdagradInputsNum = 7;
25 constexpr size_t kSparseApplyProximalAdagradWorkspaceSize = 4;
26 
27 template <typename T>
ComputeProximalAdagrad(MultiThreadComputeParams<T> * input_params,size_t start,size_t end)28 void ComputeProximalAdagrad(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) {
29   MS_EXCEPTION_IF_NULL(input_params);
30   auto var = input_params->var_;
31   auto accum = input_params->accum_;
32   const auto lr = input_params->lr_;
33   const auto l1 = input_params->l1_;
34   const auto l2 = input_params->l2_;
35   const auto unique_sparse_grad = input_params->sparse_grad_;
36   const auto var_first_dim_size = input_params->var_first_dim_size_;
37   const auto var_outer_dim_size = input_params->var_outer_dim_size_;
38   for (size_t i = start; i < end; ++i) {
39     T index = unique_sparse_grad.indices_[i];
40     if (index < 0 || LongToSize(index) >= var_first_dim_size) {
41       MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range after unique process";
42     }
43     size_t start_index = var_outer_dim_size * static_cast<size_t>(index);
44     size_t end_index = start_index + var_outer_dim_size;
45     for (size_t j = start_index, k = var_outer_dim_size * i; j < end_index; ++j, ++k) {
46       auto summed_grad = unique_sparse_grad.value_[k];
47       accum[j] += summed_grad * summed_grad;
48       auto learning_rate = lr * (1 / std::sqrt(accum[j]));
49       auto prox_v = var[j];
50       prox_v -= summed_grad * learning_rate;
51       if (l1 > 0) {
52         var[j] = Sign(prox_v) * std::fmax(std::fabs(prox_v) - learning_rate * l1, static_cast<float>(0.0)) /
53                  (1 + l2 * learning_rate);
54       } else {
55         var[j] = prox_v / (1 + l2 * learning_rate);
56       }
57     }
58   }
59 }
60 }  // namespace
61 
62 template <typename T>
InitWorkspaceSize()63 void SparseApplyProximalAdagradCPUKernel::InitWorkspaceSize() {
64   (void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
65   (void)workspace_size_list_.emplace_back(indices_size_ * sizeof(T));
66   (void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
67   (void)workspace_size_list_.emplace_back(indices_size_ * sizeof(T));
68 }
69 
InitInputOutputSize(const CNodePtr & kernel_node)70 void SparseApplyProximalAdagradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
71   CPUKernel::InitInputOutputSize(kernel_node);
72   if (indices_data_type_ == kNumberTypeInt32) {
73     InitWorkspaceSize<int>();
74   } else if (indices_data_type_ == kNumberTypeInt64) {
75     InitWorkspaceSize<int64_t>();
76   } else {
77     MS_LOG(EXCEPTION) << "Input data type " << indices_data_type_ << " is unsupported";
78   }
79 }
80 
InitKernel(const CNodePtr & kernel_node)81 void SparseApplyProximalAdagradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
82   MS_EXCEPTION_IF_NULL(kernel_node);
83   kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
84   std::vector<size_t> var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
85   std::vector<size_t> accum_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
86   std::vector<size_t> lr_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
87   std::vector<size_t> l1_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
88   std::vector<size_t> l2_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
89   std::vector<size_t> grad_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
90   std::vector<size_t> indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
91   if (var_shape.empty()) {
92     MS_LOG(EXCEPTION) << "var must be at least 1D";
93   }
94   if (!IsSameShape(var_shape, accum_shape)) {
95     MS_LOG(EXCEPTION) << "var and accum should have the same shape";
96   }
97   if (var_shape.size() != grad_shape.size()) {
98     MS_LOG(EXCEPTION) << "var and grad should have the same shape size";
99   }
100   var_first_dim_size_ = var_shape[0];
101   for (size_t i = 1; i < var_shape.size(); ++i) {
102     if (var_shape[i] != grad_shape[i]) {
103       MS_LOG(EXCEPTION) << "The shape of var and grad must equal in dimension " << i;
104     }
105     var_outer_dim_size_ *= var_shape[i];
106   }
107   if (indices_shape.size() != 1) {
108     MS_LOG(EXCEPTION) << "indices must be a 1D vector";
109   }
110   indices_size_ = indices_shape[0];
111   if (grad_shape[0] != indices_size_) {
112     MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices";
113   }
114   if (!lr_shape.empty()) {
115     MS_LOG(EXCEPTION) << "lr is not a scalar";
116   }
117   if (!l1_shape.empty()) {
118     MS_LOG(EXCEPTION) << "l1 is not a scalar";
119   }
120   if (!l2_shape.empty()) {
121     MS_LOG(EXCEPTION) << "l2 is not a scalar";
122   }
123   indices_data_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 6);
124 }
125 
126 template <typename T>
LaunchKernel(const std::vector<kernel::AddressPtr> & inputs,const std::vector<kernel::AddressPtr> & workspace) const127 void SparseApplyProximalAdagradCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
128                                                        const std::vector<kernel::AddressPtr> &workspace) const {
129   auto var = reinterpret_cast<float *>(inputs[0]->addr);
130   auto accum = reinterpret_cast<float *>(inputs[1]->addr);
131   auto lr = reinterpret_cast<float *>(inputs[2]->addr)[0];
132   auto l1 = reinterpret_cast<float *>(inputs[3]->addr)[0];
133   auto l2 = reinterpret_cast<float *>(inputs[4]->addr)[0];
134   auto grad = reinterpret_cast<float *>(inputs[5]->addr);
135   auto indices = reinterpret_cast<T *>(inputs[6]->addr);
136   auto new_grad = reinterpret_cast<float *>(workspace[0]->addr);
137   auto new_indices = reinterpret_cast<T *>(workspace[1]->addr);
138   auto workspace_grad = reinterpret_cast<float *>(workspace[2]->addr);
139   auto workspace_indices = reinterpret_cast<T *>(workspace[3]->addr);
140 
141   SparseGradient<T> unique_sparse_grad({new_grad, new_indices, indices_size_});
142   SparseGradient<T> workspace_sparse_grad({workspace_grad, workspace_indices, indices_size_});
143   SparseGradient<T> input_sparse_grad({grad, indices, indices_size_});
144   ReduceSparseGradientParam<T> param;
145   param.input_grad_ = &input_sparse_grad;
146   param.workspace_grad_ = &workspace_sparse_grad;
147   param.output_grad_ = &unique_sparse_grad;
148   param.max_index_ = var_first_dim_size_;
149   param.value_stride_ = var_outer_dim_size_;
150   BucketReduceSparseGradient(param);
151 
152   MultiThreadComputeParams<T> input_params;
153   input_params.var_ = var;
154   input_params.accum_ = accum;
155   input_params.lr_ = lr;
156   input_params.l1_ = l1;
157   input_params.l2_ = l2;
158   input_params.sparse_grad_ = unique_sparse_grad;
159   input_params.var_first_dim_size_ = var_first_dim_size_;
160   input_params.var_outer_dim_size_ = var_outer_dim_size_;
161   MultiThreadCompute<T>(ComputeProximalAdagrad<T>, &input_params, unique_sparse_grad.indices_size_);
162 }
163 
Launch(const std::vector<kernel::AddressPtr> & inputs,const std::vector<kernel::AddressPtr> & workspace,const std::vector<kernel::AddressPtr> &)164 bool SparseApplyProximalAdagradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
165                                                  const std::vector<kernel::AddressPtr> &workspace,
166                                                  const std::vector<kernel::AddressPtr> &) {
167   CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseApplyProximalAdagradInputsNum, kernel_name_);
168   CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSparseApplyProximalAdagradWorkspaceSize, kernel_name_);
169   if (indices_data_type_ == kNumberTypeInt32) {
170     LaunchKernel<int>(inputs, workspace);
171   } else if (indices_data_type_ == kNumberTypeInt64) {
172     LaunchKernel<int64_t>(inputs, workspace);
173   } else {
174     MS_LOG(EXCEPTION) << "Unsupported indices data type: " << indices_data_type_;
175   }
176   return true;
177 }
178 }  // namespace kernel
179 }  // namespace mindspore
180