• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "backend/kernel_compiler/cpu/l2normalize_grad_cpu_kernel.h"
18 
19 namespace mindspore {
20 namespace kernel {
21 namespace {
22 constexpr size_t kL2NormalizeGradInputsNum = 3;
23 constexpr size_t kL2NormalizeGradOutputsNum = 1;
24 }  // namespace
25 
26 template <typename T>
InitKernel(const CNodePtr & kernel_node)27 void L2NormalizeGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
28   MS_EXCEPTION_IF_NULL(kernel_node);
29   kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);
30   for (size_t i = 0; i < kL2NormalizeGradInputsNum; i++) {
31     (void)input_shape_list_.emplace_back(AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i));
32   }
33   auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
34   CheckInputShape(output_shape);
35 
36   int output_dim_length = output_shape.size();
37   dim_elem_num_list_.resize(output_dim_length, 1);
38   for (int i = output_dim_length - 2; i >= 0; i--) {  // from -2 to 0 dim
39     dim_elem_num_list_[i] = output_shape[i + 1] * dim_elem_num_list_[i + 1];
40   }
41 
42   int axis = LongToInt(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "axis"));
43   int input_dim_length = SizeToInt(input_shape_list_[0].size());
44   axis_ = axis < 0 ? (axis + input_dim_length) : axis;
45   epsilon_ = static_cast<T>(AnfAlgo::GetNodeAttr<float>(kernel_node, "epsilon"));
46 }
47 
48 template <typename T>
Launch(const std::vector<AddressPtr> & inputs,const std::vector<AddressPtr> & workspace,const std::vector<AddressPtr> & outputs)49 bool L2NormalizeGradCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs,
50                                          const std::vector<AddressPtr> &workspace,
51                                          const std::vector<AddressPtr> &outputs) {
52   CHECK_KERNEL_INPUTS_NUM(inputs.size(), kL2NormalizeGradInputsNum, kernel_name_);
53   CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kL2NormalizeGradOutputsNum, kernel_name_);
54   auto input_x = reinterpret_cast<T *>(inputs[0]->addr);
55   auto y = reinterpret_cast<T *>(inputs[1]->addr);
56   auto dout = reinterpret_cast<T *>(inputs[2]->addr);
57   auto output = reinterpret_cast<T *>(outputs[0]->addr);
58   auto output_size = outputs[0]->size / sizeof(T);
59   auto task = [this, input_x, y, dout, output](size_t start, size_t end) {
60     for (size_t i = start; i < end; i++) {
61       std::vector<size_t> high_dim_index = OneDimIndexToHighDimIndex(i);
62       std::vector<T> input_x_vector = GetVector(high_dim_index, input_x);
63       std::vector<T> dout_vector = GetVector(high_dim_index, dout);
64       std::vector<T> y_vector = GetVector(high_dim_index, y);
65       GetOutput(input_x_vector, y_vector, dout_vector, high_dim_index, &output[i]);
66     }
67   };
68   CPUKernelUtils::ParallelFor(task, output_size);
69   return true;
70 }
71 
72 template <typename T>
CheckInputShape(const std::vector<size_t> & output_shape)73 void L2NormalizeGradCPUKernel<T>::CheckInputShape(const std::vector<size_t> &output_shape) {
74   for (const auto &shape : input_shape_list_) {
75     if (output_shape != shape) {
76       MS_LOG(EXCEPTION) << "Input shape and output shape should be same.";
77     }
78   }
79   auto input_x_shape = input_shape_list_[0];
80   if (input_x_shape.size() != 0) {
81     if (std::any_of(input_x_shape.begin(), input_x_shape.end(), [](size_t i) { return i == 0; })) {
82       MS_LOG(EXCEPTION) << "L2NormalizeCPUKernel input is null.";
83     }
84   }
85 }
86 
87 template <typename T>
OneDimIndexToHighDimIndex(size_t one_dim_index)88 std::vector<size_t> L2NormalizeGradCPUKernel<T>::OneDimIndexToHighDimIndex(size_t one_dim_index) {
89   std::vector<size_t> high_dim_index;
90   high_dim_index.reserve(dim_elem_num_list_.size());
91   for (const auto &item : dim_elem_num_list_) {
92     high_dim_index.push_back(one_dim_index / item);
93     one_dim_index %= item;
94   }
95   // referred to Copy elision https://en.cppreference.com/w/cpp/language/copy_elision
96   // returning a vector won't cause extra vector constructed or moved
97   return high_dim_index;
98 }
99 
100 template <typename T>
HighDimIndexToOneDimIndex(size_t * one_dim_index,const std::vector<size_t> & high_dim_index)101 void L2NormalizeGradCPUKernel<T>::HighDimIndexToOneDimIndex(size_t *one_dim_index,
102                                                             const std::vector<size_t> &high_dim_index) {
103   *one_dim_index = 0;
104   int len = high_dim_index.size();
105   for (int i = 0; i < len; i++) {
106     *one_dim_index += high_dim_index[i] * dim_elem_num_list_[i];
107   }
108 }
109 
110 template <typename T>
GetVector(const std::vector<size_t> & high_dim_index,const T * x)111 std::vector<T> L2NormalizeGradCPUKernel<T>::GetVector(const std::vector<size_t> &high_dim_index, const T *x) {
112   auto x_shape = input_shape_list_[0];
113   std::vector<T> x_vector;
114   x_vector.reserve(x_shape[axis_]);
115   for (size_t i = 0; i < x_shape[axis_]; i++) {
116     size_t oneDimIndex = 0;
117     std::vector<size_t> tmp_high_dim_index = high_dim_index;
118     tmp_high_dim_index[axis_] = i;
119     HighDimIndexToOneDimIndex(&oneDimIndex, tmp_high_dim_index);
120     (void)x_vector.emplace_back(x[oneDimIndex]);
121   }
122   // referred to Copy elision https://en.cppreference.com/w/cpp/language/copy_elision
123   // returning a vector won't cause extra vector constructed or moved
124   return x_vector;
125 }
126 
127 template <typename T>
GetSumOfProduct(const std::vector<T> & x_vector,const std::vector<T> & y_vector,T * ss)128 void L2NormalizeGradCPUKernel<T>::GetSumOfProduct(const std::vector<T> &x_vector, const std::vector<T> &y_vector,
129                                                   T *ss) {
130   size_t len = x_vector.size();
131   std::vector<T> tmp_vector(len);
132   for (size_t i = 0; i < len; i++) {
133     tmp_vector[i] = x_vector[i] * y_vector[i];
134   }
135   const size_t half = 2;
136   if (len % half == 1) {
137     tmp_vector[0] += tmp_vector[len - 1];
138   }
139   for (size_t stride = len / half; stride > 0; stride >>= 1) {
140     for (size_t i = 0; i < stride; i++) {
141       tmp_vector[i] += tmp_vector[i + stride];
142     }
143     if (stride > half && stride % half == 1) {
144       tmp_vector[0] += tmp_vector[stride - 1];
145     }
146   }
147   *ss = tmp_vector[0];
148 }
149 
150 template <typename T>
GetOutput(const std::vector<T> & input_x_vector,const std::vector<T> & y_vector,const std::vector<T> & dout_vector,const std::vector<size_t> & high_dim_index,T * output)151 void L2NormalizeGradCPUKernel<T>::GetOutput(const std::vector<T> &input_x_vector, const std::vector<T> &y_vector,
152                                             const std::vector<T> &dout_vector,
153                                             const std::vector<size_t> &high_dim_index, T *output) {
154   size_t axis_index = high_dim_index[axis_];
155   T dout = dout_vector[axis_index];
156   T y = y_vector[axis_index];
157   T tmp_sum1;
158   GetSumOfProduct(y_vector, dout_vector, &tmp_sum1);
159   T tmp_sum2;
160   GetSumOfProduct(input_x_vector, input_x_vector, &tmp_sum2);
161   tmp_sum2 = sqrt(tmp_sum2);
162   if (tmp_sum2 >= epsilon_) {
163     *output = (dout - y * tmp_sum1) / tmp_sum2;
164   } else {
165     *output = (dout - y * tmp_sum1) / epsilon_;
166   }
167 }
168 }  // namespace kernel
169 }  // namespace mindspore
170