• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "backend/kernel_compiler/cpu/sparse_apply_ftrl_cpu_kernel.h"
18 #include "backend/kernel_compiler/common_utils.h"
19 #include "runtime/device/cpu/cpu_device_address.h"
20 
21 namespace mindspore {
22 namespace kernel {
23 namespace {
24 constexpr size_t kSparseApplyFtrlInputsNum = 5;
25 constexpr size_t kSparseApplyFtrlWorkspaceSize = 4;
26 
27 template <typename T>
ComputeFtrl(MultiThreadComputeParams<T> * input_params,size_t start,size_t end)28 void ComputeFtrl(MultiThreadComputeParams<T> *input_params, size_t start, size_t end) {
29   MS_EXCEPTION_IF_NULL(input_params);
30   auto var = input_params->var_;
31   auto accum = input_params->accum_;
32   auto linear = input_params->linear_;
33   const auto lr = input_params->lr_;
34   const auto l1 = input_params->l1_;
35   const auto l2_plus = 2 * input_params->l2_;
36   const auto lr_power = input_params->lr_power_;
37   const auto unique_sparse_grad = input_params->sparse_grad_;
38   const auto var_first_dim_size = input_params->var_first_dim_size_;
39   const auto var_outer_dim_size = input_params->var_outer_dim_size_;
40   for (size_t i = start; i < end; ++i) {
41     T index = unique_sparse_grad.indices_[i];
42     if (index < 0 || LongToSize(index) >= var_first_dim_size) {
43       MS_LOG(EXCEPTION) << "Index " << index << " in indices is out of range after unique process";
44     }
45     size_t start_index = var_outer_dim_size * static_cast<size_t>(index);
46     size_t end_index = start_index + var_outer_dim_size;
47     for (size_t j = start_index, k = var_outer_dim_size * i; j < end_index; ++j, ++k) {
48       auto summed_grad = unique_sparse_grad.value_[k];
49       auto accum_new = accum[j] + summed_grad * summed_grad;
50       float y;
51       if (lr_power == -0.5) {
52         y = std::sqrt(accum_new);
53         linear[j] += summed_grad - (y - std::sqrt(accum[j])) / lr * var[j];
54       } else {
55         y = std::pow(accum_new, -lr_power);
56         linear[j] += summed_grad - (y - std::pow(accum[j], -lr_power)) / lr * var[j];
57       }
58       accum[j] = accum_new;
59       auto x = Sign(linear[j]) * l1 - linear[j];
60       y = y / lr + l2_plus;
61       var[j] = std::fabs(linear[j]) > l1 ? x / y : 0;
62     }
63   }
64 }
65 }  // namespace
66 
67 template <typename T>
InitWorkspaceSize()68 void SparseApplyFtrlCPUKernel::InitWorkspaceSize() {
69   (void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
70   (void)workspace_size_list_.emplace_back(indices_size_ * sizeof(T));
71   (void)workspace_size_list_.emplace_back(indices_size_ * var_outer_dim_size_ * sizeof(float));
72   (void)workspace_size_list_.emplace_back(indices_size_ * sizeof(T));
73 }
74 
InitInputOutputSize(const CNodePtr & kernel_node)75 void SparseApplyFtrlCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
76   CPUKernel::InitInputOutputSize(kernel_node);
77   if (indices_data_type_ == kNumberTypeInt32) {
78     InitWorkspaceSize<int>();
79   } else if (indices_data_type_ == kNumberTypeInt64) {
80     InitWorkspaceSize<int64_t>();
81   } else {
82     MS_LOG(EXCEPTION) << "Input data type " << indices_data_type_ << " is unsupported";
83   }
84 }
85 
InitKernel(const CNodePtr & kernel_node)86 void SparseApplyFtrlCPUKernel::InitKernel(const CNodePtr &kernel_node) {
87   MS_EXCEPTION_IF_NULL(kernel_node);
88   std::vector<size_t> var_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
89   std::vector<size_t> accum_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
90   std::vector<size_t> linear_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
91   std::vector<size_t> grad_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
92   std::vector<size_t> indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
93   if (var_shape.empty()) {
94     MS_LOG(EXCEPTION) << "var must be at least 1D";
95   }
96   if (!IsSameShape(var_shape, accum_shape)) {
97     MS_LOG(EXCEPTION) << "var and accum should have the same shape";
98   }
99   if (!IsSameShape(var_shape, linear_shape)) {
100     MS_LOG(EXCEPTION) << "var and linear should have the same shape";
101   }
102   if (var_shape.size() != grad_shape.size()) {
103     MS_LOG(EXCEPTION) << "var and grad should have the same shape size";
104   }
105 
106   var_first_dim_size_ = var_shape[0];
107   for (size_t i = 1; i < var_shape.size(); ++i) {
108     if (var_shape[i] != grad_shape[i]) {
109       MS_LOG(EXCEPTION) << "The shape of var and grad must equal in dimension " << i;
110     }
111     var_outer_dim_size_ *= var_shape[i];
112   }
113   if (indices_shape.size() != 1) {
114     MS_LOG(EXCEPTION) << "Indices must be a 1D vector!";
115   }
116   indices_size_ = indices_shape[0];
117   if (grad_shape[0] != indices_size_) {
118     MS_LOG(EXCEPTION) << "The first dimension of grad shape must be equal to indices";
119   }
120   lr_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "lr");
121   if (lr_ <= 0) {
122     MS_LOG(EXCEPTION) << "lr should be a positive scalar";
123   }
124   l1_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "l1");
125   if (l1_ < 0) {
126     MS_LOG(EXCEPTION) << "l1 should be a non-negative scalar";
127   }
128   l2_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "l2");
129   if (l2_ < 0) {
130     MS_LOG(EXCEPTION) << "l2 should be a non-negative scalar";
131   }
132   lr_power_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "lr_power");
133   if (lr_power_ > 0) {
134     MS_LOG(EXCEPTION) << "lr_power should be a non-positive scalar";
135   }
136   indices_data_type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 4);
137 }
138 
139 template <typename T>
LaunchKernel(const std::vector<kernel::AddressPtr> & inputs,const std::vector<kernel::AddressPtr> & workspace) const140 void SparseApplyFtrlCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
141                                             const std::vector<kernel::AddressPtr> &workspace) const {
142   auto *var = reinterpret_cast<float *>(inputs[0]->addr);
143   auto *accum = reinterpret_cast<float *>(inputs[1]->addr);
144   auto *linear = reinterpret_cast<float *>(inputs[2]->addr);
145   auto *grad = reinterpret_cast<float *>(inputs[3]->addr);
146   auto *indices = reinterpret_cast<T *>(inputs[4]->addr);
147   auto *new_grad = reinterpret_cast<float *>(workspace[0]->addr);
148   auto *new_indices = reinterpret_cast<T *>(workspace[1]->addr);
149   auto *workspace_grad = reinterpret_cast<float *>(workspace[2]->addr);
150   auto *workspace_indices = reinterpret_cast<T *>(workspace[3]->addr);
151 
152   SparseGradient<T> unique_sparse_grad({new_grad, new_indices, indices_size_});
153   SparseGradient<T> workspace_sparse_grad({workspace_grad, workspace_indices, indices_size_});
154   SparseGradient<T> input_sparse_grad({grad, indices, indices_size_});
155   ReduceSparseGradientParam<T> param;
156   param.input_grad_ = &input_sparse_grad;
157   param.workspace_grad_ = &workspace_sparse_grad;
158   param.output_grad_ = &unique_sparse_grad;
159   param.max_index_ = var_first_dim_size_;
160   param.value_stride_ = var_outer_dim_size_;
161   BucketReduceSparseGradient(param);
162 
163   MultiThreadComputeParams<T> input_params;
164   input_params.var_ = var;
165   input_params.accum_ = accum;
166   input_params.linear_ = linear;
167   input_params.lr_ = lr_;
168   input_params.l1_ = l1_;
169   input_params.l2_ = l2_;
170   input_params.lr_power_ = lr_power_;
171   input_params.sparse_grad_ = unique_sparse_grad;
172   input_params.var_first_dim_size_ = var_first_dim_size_;
173   input_params.var_outer_dim_size_ = var_outer_dim_size_;
174   MultiThreadCompute<T>(ComputeFtrl<T>, &input_params, unique_sparse_grad.indices_size_);
175 }
176 
Launch(const std::vector<kernel::AddressPtr> & inputs,const std::vector<kernel::AddressPtr> & workspace,const std::vector<kernel::AddressPtr> &)177 bool SparseApplyFtrlCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
178                                       const std::vector<kernel::AddressPtr> &workspace,
179                                       const std::vector<kernel::AddressPtr> &) {
180   CHECK_KERNEL_INPUTS_NUM(inputs.size(), kSparseApplyFtrlInputsNum, kernel_name_);
181   CHECK_KERNEL_WORKSPACE_SIZE(workspace.size(), kSparseApplyFtrlWorkspaceSize, kernel_name_);
182   if (indices_data_type_ == kNumberTypeInt32) {
183     LaunchKernel<int>(inputs, workspace);
184   } else if (indices_data_type_ == kNumberTypeInt64) {
185     LaunchKernel<int64_t>(inputs, workspace);
186   } else {
187     MS_LOG(EXCEPTION) << "Unsupported indices data type: " << indices_data_type_;
188   }
189   return true;
190 }
191 }  // namespace kernel
192 }  // namespace mindspore
193