• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "kernel/ops_utils.h"
18 #include <utility>
19 #include <algorithm>
20 
21 namespace mindspore {
22 namespace kernel {
23 namespace {
24 constexpr auto kStridedSliceMaxDims = 8;
25 }  // namespace
26 
Dec2Bin(const int64_t & mask)27 std::vector<bool> Dec2Bin(const int64_t &mask) {
28   auto mask_str = std::bitset<kStridedSliceMaxDims>(mask).to_string();
29   size_t dim_idx = 0;
30   std::vector<bool> result(kStridedSliceMaxDims, false);
31   for (auto iter = mask_str.rbegin(); iter != mask_str.rend(); ++iter) {
32     if (*iter == '1') {
33       result[dim_idx] = true;
34     }
35     ++dim_idx;
36   }
37   return result;
38 }
39 
FillEmptyDims(const std::string & kernel_name,std::vector<int64_t> * begin,std::vector<int64_t> * end,std::vector<int64_t> * stride,ShapeVector * input_shape,bool is_gpu_strided)40 void FillEmptyDims(const std::string &kernel_name, std::vector<int64_t> *begin, std::vector<int64_t> *end,
41                    std::vector<int64_t> *stride, ShapeVector *input_shape, bool is_gpu_strided) {
42   std::vector<int64_t> &_begin = *begin;
43   std::vector<int64_t> &_end = *end;
44   std::vector<int64_t> &_stride = *stride;
45   auto &_input_shape = *input_shape;
46   if (_begin.size() != _end.size() || _begin.size() != _stride.size() || _begin.size() > _input_shape.size()) {
47     MS_LOG(EXCEPTION) << "For '" << kernel_name
48                       << "', the length of 'begin', 'stride' and 'end' should be equal "
49                          "and less than or equal to the dimension of 'input_x', but got the length of 'begin': "
50                       << _begin.size() << ", the length of 'stride': " << _stride.size()
51                       << ", the length of 'end': " << _end.size()
52                       << ", the dimension of 'input_x': " << _input_shape.size();
53   }
54 
55   for (size_t i = 0; i < kStridedSliceMaxDims; i++) {
56     if (i >= _input_shape.size()) {
57       _input_shape.push_back(1);
58     }
59 
60     if (i < _begin.size()) {
61       int64_t dim = _input_shape[i];
62       if (is_gpu_strided) {
63         // GPU kernel is flattened using offset to get stride slice
64         _begin[i] = std::min(_begin[i] < 0 ? std::max(_begin[i] + dim, static_cast<int64_t>(0)) : _begin[i], dim - 1);
65       } else {
66         // CPU using for begin is larger than end the circle will be break
67         _begin[i] = std::min(_begin[i] < 0 ? std::max(_begin[i] + dim, static_cast<int64_t>(0)) : _begin[i], dim);
68       }
69     } else {
70       _begin.push_back(0);
71     }
72 
73     if (i < _end.size()) {
74       int64_t dim = _input_shape[i];
75       _end[i] = std::max(_end[i] < 0 ? _end[i] + dim : std::min(_end[i], dim), static_cast<int64_t>(-1));
76     } else {
77       _end.push_back(i < _input_shape.size() ? _input_shape[i] : 1);
78     }
79 
80     if (i >= _stride.size()) {
81       _stride.push_back(1);
82     }
83   }
84 }
85 
ComputeBeginMask(std::vector<int64_t> * begin,const std::vector<int64_t> & stride,const ShapeVector & input_shape,KernelTensor * kernel_tensor)86 void ComputeBeginMask(std::vector<int64_t> *begin, const std::vector<int64_t> &stride, const ShapeVector &input_shape,
87                       KernelTensor *kernel_tensor) {
88   std::vector<int64_t> &_begin = *begin;
89   auto begin_mask_int = kernel_tensor->GetValueWithCheck<int64_t>();
90   auto begin_mask = Dec2Bin(begin_mask_int);
91   for (size_t i = 0; i < begin_mask.size(); i++) {
92     if (i < kStridedSliceMaxDims && begin_mask[i]) {
93       _begin[i] = stride[i] < 0 ? input_shape[i] - 1 : 0;
94     }
95   }
96 }
97 
ComputeEndMask(std::vector<int64_t> * end,const std::vector<int64_t> & stride,const ShapeVector & input_shape,KernelTensor * kernel_tensor)98 void ComputeEndMask(std::vector<int64_t> *end, const std::vector<int64_t> &stride, const ShapeVector &input_shape,
99                     KernelTensor *kernel_tensor) {
100   std::vector<int64_t> &_end = *end;
101   auto end_mask_int = kernel_tensor->GetValueWithCheck<int64_t>();
102   auto end_mask = Dec2Bin(end_mask_int);
103   for (size_t j = 0; j < end_mask.size(); j++) {
104     if (j < kStridedSliceMaxDims && end_mask[j]) {
105       _end[j] = stride[j] < 0 ? -1 : input_shape[j];
106     }
107   }
108 }
109 
ComputeEllipsisMask(std::vector<int64_t> * begin,std::vector<int64_t> * end,std::vector<int64_t> * stride,const ShapeVector & input_shape,KernelTensor * kernel_tensor)110 void ComputeEllipsisMask(std::vector<int64_t> *begin, std::vector<int64_t> *end, std::vector<int64_t> *stride,
111                          const ShapeVector &input_shape, KernelTensor *kernel_tensor) {
112   std::vector<int64_t> &_begin = *begin;
113   std::vector<int64_t> &_end = *end;
114   std::vector<int64_t> &_stride = *stride;
115   auto ellipsis_mask_int = kernel_tensor->GetValueWithCheck<int64_t>();
116   auto ellipsis_mask = Dec2Bin(ellipsis_mask_int);
117   for (size_t k = 0; k < ellipsis_mask.size(); k++) {
118     if (k < kStridedSliceMaxDims && ellipsis_mask[k]) {
119       _begin[k] = 0;
120       _end[k] = input_shape[k];
121       _stride[k] = 1;
122     }
123   }
124 }
125 
ComputNewAxisMask(std::vector<int64_t> * begin,std::vector<int64_t> * end,std::vector<int64_t> * stride,const ShapeVector & input_shape,KernelTensor * kernel_tensor)126 void ComputNewAxisMask(std::vector<int64_t> *begin, std::vector<int64_t> *end, std::vector<int64_t> *stride,
127                        const ShapeVector &input_shape, KernelTensor *kernel_tensor) {
128   std::vector<int64_t> &_begin = *begin;
129   std::vector<int64_t> &_end = *end;
130   std::vector<int64_t> &_stride = *stride;
131   auto new_axis_mask_int = kernel_tensor->GetValueWithCheck<int64_t>();
132   auto new_axis_mask = Dec2Bin(new_axis_mask_int);
133   for (size_t l = 0; l < new_axis_mask.size(); l++) {
134     if (l < kStridedSliceMaxDims && new_axis_mask[l]) {
135       _begin[l] = 0;
136       _end[l] = input_shape[l];
137       _stride[l] = 1;
138     }
139   }
140 }
141 
ComputeShrinkAxisMask(const std::vector<int64_t> & begin,std::vector<int64_t> * end,std::vector<int64_t> * stride,KernelTensor * kernel_tensor)142 void ComputeShrinkAxisMask(const std::vector<int64_t> &begin, std::vector<int64_t> *end, std::vector<int64_t> *stride,
143                            KernelTensor *kernel_tensor) {
144   std::vector<int64_t> &_end = *end;
145   std::vector<int64_t> &_stride = *stride;
146   auto shrink_axis_mask_int = kernel_tensor->GetValueWithCheck<int64_t>();
147   auto shrink_axis_mask = Dec2Bin(shrink_axis_mask_int);
148   for (size_t m = 0; m < shrink_axis_mask.size(); m++) {
149     if (m < kStridedSliceMaxDims && shrink_axis_mask[m]) {
150       _end[m] = _end[m] > begin[m] ? begin[m] + 1 : begin[m] - 1;
151       _stride[m] = _end[m] > begin[m] ? 1 : -1;
152     }
153   }
154 }
155 
ParseStrideSliceMasks(const std::vector<kernel::KernelTensor * > & inputs,std::vector<int64_t> * begin,std::vector<int64_t> * end,std::vector<int64_t> * stride,const ShapeVector & input_shape)156 void ParseStrideSliceMasks(const std::vector<kernel::KernelTensor *> &inputs, std::vector<int64_t> *begin,
157                            std::vector<int64_t> *end, std::vector<int64_t> *stride, const ShapeVector &input_shape) {
158   ComputeBeginMask(begin, *stride, input_shape, inputs[kIndex4]);
159   ComputeEndMask(end, *stride, input_shape, inputs[kIndex5]);
160   ComputeEllipsisMask(begin, end, stride, input_shape, inputs[kIndex6]);
161   ComputNewAxisMask(begin, end, stride, input_shape, inputs[kIndex7]);
162   ComputeShrinkAxisMask(*begin, end, stride, inputs[kIndex8]);
163 }
164 
165 // ===========================Old interface==========================================================
FillEmptyDims(const BaseOperatorPtr & base_operator,std::vector<int64_t> * begin,std::vector<int64_t> * end,std::vector<int64_t> * stride,ShapeVector * input_shape,bool is_gpu_strided)166 void FillEmptyDims(const BaseOperatorPtr &base_operator, std::vector<int64_t> *begin, std::vector<int64_t> *end,
167                    std::vector<int64_t> *stride, ShapeVector *input_shape, bool is_gpu_strided) {
168   std::vector<int64_t> &_begin = *begin;
169   std::vector<int64_t> &_end = *end;
170   std::vector<int64_t> &_stride = *stride;
171   auto &_input_shape = *input_shape;
172   if (_begin.size() != _end.size() || _begin.size() != _stride.size() || _begin.size() > _input_shape.size()) {
173     MS_LOG(EXCEPTION) << "For '" << base_operator->name()
174                       << "', the length of 'begin', 'stride' and 'end' should be equal "
175                          "and less than or equal to the dimension of 'input_x', but got the length of 'begin': "
176                       << _begin.size() << ", the length of 'stride': " << _stride.size()
177                       << ", the length of 'end': " << _end.size()
178                       << ", the dimension of 'input_x': " << _input_shape.size();
179   }
180 
181   for (size_t i = 0; i < kStridedSliceMaxDims; i++) {
182     if (i >= _input_shape.size()) {
183       _input_shape.push_back(1);
184     }
185 
186     if (i < _begin.size()) {
187       int64_t dim = _input_shape[i];
188       if (is_gpu_strided) {
189         // GPU kernel is flattened using offset to get stride slice
190         _begin[i] = std::min(_begin[i] < 0 ? std::max(_begin[i] + dim, static_cast<int64_t>(0)) : _begin[i], dim - 1);
191       } else {
192         // CPU using for begin is larger than end the circle will be break
193         _begin[i] = std::min(_begin[i] < 0 ? std::max(_begin[i] + dim, static_cast<int64_t>(0)) : _begin[i], dim);
194       }
195     } else {
196       _begin.push_back(0);
197     }
198 
199     if (i < _end.size()) {
200       int64_t dim = _input_shape[i];
201       _end[i] = std::max(_end[i] < 0 ? _end[i] + dim : std::min(_end[i], dim), static_cast<int64_t>(-1));
202     } else {
203       _end.push_back(i < _input_shape.size() ? _input_shape[i] : 1);
204     }
205 
206     if (i >= _stride.size()) {
207       _stride.push_back(1);
208     }
209   }
210 }
211 
Scaling(size_t in_size,size_t out_size,bool align_corners)212 float Scaling(size_t in_size, size_t out_size, bool align_corners) {
213   return (align_corners && out_size > 1) ? SizeToFloat(in_size - 1) / static_cast<float>(out_size - 1)
214                                          : SizeToFloat(in_size) / static_cast<float>(out_size);
215 }
216 
ScaleGrid(const int x,const float scale,bool half_pixel_centers)217 float ScaleGrid(const int x, const float scale, bool half_pixel_centers) {
218   if (half_pixel_centers) {
219     return (static_cast<float>(x) + 0.5f) * scale - 0.5f;
220   } else {
221     return static_cast<float>(x) * scale;
222   }
223 }
224 
ComputeInterpolationWeights(const size_t out_size,const size_t in_size,const float scale,CachedInterpolation * interpolation,bool half_pixel_centers)225 void ComputeInterpolationWeights(const size_t out_size, const size_t in_size, const float scale,
226                                  CachedInterpolation *interpolation, bool half_pixel_centers) {
227   interpolation[out_size].lower = 0;
228   interpolation[out_size].upper = 0;
229   for (size_t i = 0; i <= out_size - 1; ++i) {
230     const float in = ScaleGrid(SizeToInt(i), scale, half_pixel_centers);
231     const float in_f = std::floor(in);
232     interpolation[i].lower = std::max(static_cast<int64_t>(in_f), static_cast<int64_t>(0));
233     interpolation[i].upper = std::min(static_cast<int64_t>(std::ceil(in)), static_cast<int64_t>(in_size - 1));
234     interpolation[i].lerp = in - in_f;
235   }
236 }
237 
CheckSliceValid(const std::vector<int64_t> & start,const std::vector<int64_t> & stop,const std::vector<int64_t> & step,const std::vector<int64_t> & input_shape)238 void CheckSliceValid(const std::vector<int64_t> &start, const std::vector<int64_t> &stop,
239                      const std::vector<int64_t> &step, const std::vector<int64_t> &input_shape) {
240   if (start.size() != stop.size() || start.size() != step.size() || start.size() > input_shape.size()) {
241     MS_LOG(EXCEPTION)
242       << "TensorCopySlices requires the length of begin, stride and end must be equal and less than input dimension.";
243   }
244 
245   size_t size = start.size();
246   for (size_t i = 0; i < size; ++i) {
247     if (stop[i] <= start[i]) {
248       MS_LOG(EXCEPTION) << "Invalid slice: (" << start[i] << ", " << stop[i] << " ," << step[i] << ")";
249     }
250     // Operator need to be generalized in the future. Only support to copy continuous memory now.
251     if (step[i] != 1) {
252       MS_LOG(EXCEPTION) << "The element in step only support 1, but got:" << step;
253     }
254   }
255 
256   size_t slice_pos = size;
257   for (size_t i = 0; i < size; ++i) {
258     if (stop[i] - start[i] > 1) {
259       slice_pos = i;
260       break;
261     }
262   }
263 
264   for (size_t i = slice_pos + 1; i < size; ++i) {
265     if (stop[i] - start[i] != input_shape[i]) {
266       MS_LOG(EXCEPTION) << "Only support copy continuous memory now. For example tensor[0, 0:100] is fine, "
267                            "but tensor[0:100, 0] is not supported.";
268     }
269   }
270 }
271 
GetCopySize(const std::vector<int64_t> & dim_offset,const std::vector<int64_t> & start,const std::vector<int64_t> & stop)272 size_t GetCopySize(const std::vector<int64_t> &dim_offset, const std::vector<int64_t> &start,
273                    const std::vector<int64_t> &stop) {
274   for (size_t i = 0; i < start.size(); ++i) {
275     if (stop[i] - start[i] != 1) {
276       return SizetMulWithOverflowCheck(LongToSize(stop[i] - start[i]), LongToSize(dim_offset[i]));
277     }
278   }
279   return LongToSize(dim_offset[start.size() - 1]);
280 }
281 
CalDimOffset(const std::vector<int64_t> & input_shape)282 std::vector<int64_t> CalDimOffset(const std::vector<int64_t> &input_shape) {
283   std::vector<int64_t> dim_offset;
284   int64_t offset = 1;
285   for (auto iter = input_shape.rbegin(); iter != input_shape.rend(); ++iter) {
286     dim_offset.push_back(offset);
287     offset = offset * (*iter);
288   }
289   std::reverse(dim_offset.begin(), dim_offset.end());
290   return dim_offset;
291 }
292 
CalOffset(const std::vector<int64_t> & start,const std::vector<int64_t> & stop,const std::vector<int64_t> & dim_offset)293 size_t CalOffset(const std::vector<int64_t> &start, const std::vector<int64_t> &stop,
294                  const std::vector<int64_t> &dim_offset) {
295   size_t size = start.size();
296   size_t offset = 0;
297   for (size_t i = 0; i < size; ++i) {
298     offset += SizetMulWithOverflowCheck(LongToSize(dim_offset[i]), LongToSize(start[i]));
299     if (stop[i] - start[i] != 1) {
300       break;
301     }
302   }
303   return offset;
304 }
305 
GetAlignments(const std::string & alignment)306 std::pair<MatrixDiag::Alignment, MatrixDiag::Alignment> GetAlignments(const std::string &alignment) {
307   static const mindspore::HashMap<std::string, std::pair<MatrixDiag::Alignment, MatrixDiag::Alignment>> AlignmentMap{
308     {"RIGHT_LEFT", {MatrixDiag::RIGHT, MatrixDiag::LEFT}},
309     {"LEFT_RIGHT", {MatrixDiag::LEFT, MatrixDiag::RIGHT}},
310     {"RIGHT_RIGHT", {MatrixDiag::RIGHT, MatrixDiag::RIGHT}},
311     {"LEFT_LEFT", {MatrixDiag::LEFT, MatrixDiag::LEFT}}};
312 
313   auto alignment_iter = AlignmentMap.find(alignment);
314   if (alignment_iter == AlignmentMap.end()) {
315     MS_LOG(INTERNAL_EXCEPTION) << "For  current kernel, input alignment is invalid: " << alignment
316                                << ". please limit it to {RIGHT_LEFT, LEFT_RIGHT, RIGHT_RIGHT, LEFT_LEFT}";
317   }
318   return alignment_iter->second;
319 }
320 
321 namespace broadcast_utils {
AlignedBroadCastShape(size_t align_rank,std::vector<size_t> * broadcast,std::vector<size_t> * lhs,std::vector<size_t> * rhs)322 bool AlignedBroadCastShape(size_t align_rank, std::vector<size_t> *broadcast, std::vector<size_t> *lhs,
323                            std::vector<size_t> *rhs) {
324   if (broadcast == nullptr || lhs == nullptr || rhs == nullptr) {
325     MS_LOG(ERROR) << "input is nullptr.";
326     return false;
327   }
328   size_t broadcast_rank = broadcast->size();
329   size_t l_rank = lhs->size();
330   size_t r_rank = rhs->size();
331   if (broadcast_rank > align_rank || l_rank > align_rank || r_rank > align_rank) {
332     return false;
333   }
334   std::vector<size_t> aligned_broadcast(align_rank, 1);
335   std::vector<size_t> aligned_lhs(align_rank, 1);
336   std::vector<size_t> aligned_rhs(align_rank, 1);
337   size_t broadcast_offset = align_rank - broadcast_rank;
338   for (size_t i = 0; i < broadcast_rank; i++) {
339     aligned_broadcast[i + broadcast_offset] = (*broadcast)[i];
340   }
341 
342   size_t l_offset = align_rank - l_rank;
343   for (size_t i = 0; i < l_rank; i++) {
344     aligned_lhs[i + l_offset] = (*lhs)[i];
345   }
346   size_t r_offset = align_rank - r_rank;
347   for (size_t i = 0; i < r_rank; i++) {
348     aligned_rhs[i + r_offset] = (*rhs)[i];
349   }
350   *broadcast = aligned_broadcast;
351   *lhs = aligned_lhs;
352   *rhs = aligned_rhs;
353   return true;
354 }
355 }  // namespace broadcast_utils
356 }  // namespace kernel
357 }  // namespace mindspore
358