1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "kernel/ops_utils.h"
18 #include <utility>
19 #include <algorithm>
20
21 namespace mindspore {
22 namespace kernel {
23 namespace {
24 constexpr auto kStridedSliceMaxDims = 8;
25 } // namespace
26
Dec2Bin(const int64_t & mask)27 std::vector<bool> Dec2Bin(const int64_t &mask) {
28 auto mask_str = std::bitset<kStridedSliceMaxDims>(mask).to_string();
29 size_t dim_idx = 0;
30 std::vector<bool> result(kStridedSliceMaxDims, false);
31 for (auto iter = mask_str.rbegin(); iter != mask_str.rend(); ++iter) {
32 if (*iter == '1') {
33 result[dim_idx] = true;
34 }
35 ++dim_idx;
36 }
37 return result;
38 }
39
FillEmptyDims(const std::string & kernel_name,std::vector<int64_t> * begin,std::vector<int64_t> * end,std::vector<int64_t> * stride,ShapeVector * input_shape,bool is_gpu_strided)40 void FillEmptyDims(const std::string &kernel_name, std::vector<int64_t> *begin, std::vector<int64_t> *end,
41 std::vector<int64_t> *stride, ShapeVector *input_shape, bool is_gpu_strided) {
42 std::vector<int64_t> &_begin = *begin;
43 std::vector<int64_t> &_end = *end;
44 std::vector<int64_t> &_stride = *stride;
45 auto &_input_shape = *input_shape;
46 if (_begin.size() != _end.size() || _begin.size() != _stride.size() || _begin.size() > _input_shape.size()) {
47 MS_LOG(EXCEPTION) << "For '" << kernel_name
48 << "', the length of 'begin', 'stride' and 'end' should be equal "
49 "and less than or equal to the dimension of 'input_x', but got the length of 'begin': "
50 << _begin.size() << ", the length of 'stride': " << _stride.size()
51 << ", the length of 'end': " << _end.size()
52 << ", the dimension of 'input_x': " << _input_shape.size();
53 }
54
55 for (size_t i = 0; i < kStridedSliceMaxDims; i++) {
56 if (i >= _input_shape.size()) {
57 _input_shape.push_back(1);
58 }
59
60 if (i < _begin.size()) {
61 int64_t dim = _input_shape[i];
62 if (is_gpu_strided) {
63 // GPU kernel is flattened using offset to get stride slice
64 _begin[i] = std::min(_begin[i] < 0 ? std::max(_begin[i] + dim, static_cast<int64_t>(0)) : _begin[i], dim - 1);
65 } else {
66 // CPU using for begin is larger than end the circle will be break
67 _begin[i] = std::min(_begin[i] < 0 ? std::max(_begin[i] + dim, static_cast<int64_t>(0)) : _begin[i], dim);
68 }
69 } else {
70 _begin.push_back(0);
71 }
72
73 if (i < _end.size()) {
74 int64_t dim = _input_shape[i];
75 _end[i] = std::max(_end[i] < 0 ? _end[i] + dim : std::min(_end[i], dim), static_cast<int64_t>(-1));
76 } else {
77 _end.push_back(i < _input_shape.size() ? _input_shape[i] : 1);
78 }
79
80 if (i >= _stride.size()) {
81 _stride.push_back(1);
82 }
83 }
84 }
85
ComputeBeginMask(std::vector<int64_t> * begin,const std::vector<int64_t> & stride,const ShapeVector & input_shape,KernelTensor * kernel_tensor)86 void ComputeBeginMask(std::vector<int64_t> *begin, const std::vector<int64_t> &stride, const ShapeVector &input_shape,
87 KernelTensor *kernel_tensor) {
88 std::vector<int64_t> &_begin = *begin;
89 auto begin_mask_int = kernel_tensor->GetValueWithCheck<int64_t>();
90 auto begin_mask = Dec2Bin(begin_mask_int);
91 for (size_t i = 0; i < begin_mask.size(); i++) {
92 if (i < kStridedSliceMaxDims && begin_mask[i]) {
93 _begin[i] = stride[i] < 0 ? input_shape[i] - 1 : 0;
94 }
95 }
96 }
97
ComputeEndMask(std::vector<int64_t> * end,const std::vector<int64_t> & stride,const ShapeVector & input_shape,KernelTensor * kernel_tensor)98 void ComputeEndMask(std::vector<int64_t> *end, const std::vector<int64_t> &stride, const ShapeVector &input_shape,
99 KernelTensor *kernel_tensor) {
100 std::vector<int64_t> &_end = *end;
101 auto end_mask_int = kernel_tensor->GetValueWithCheck<int64_t>();
102 auto end_mask = Dec2Bin(end_mask_int);
103 for (size_t j = 0; j < end_mask.size(); j++) {
104 if (j < kStridedSliceMaxDims && end_mask[j]) {
105 _end[j] = stride[j] < 0 ? -1 : input_shape[j];
106 }
107 }
108 }
109
ComputeEllipsisMask(std::vector<int64_t> * begin,std::vector<int64_t> * end,std::vector<int64_t> * stride,const ShapeVector & input_shape,KernelTensor * kernel_tensor)110 void ComputeEllipsisMask(std::vector<int64_t> *begin, std::vector<int64_t> *end, std::vector<int64_t> *stride,
111 const ShapeVector &input_shape, KernelTensor *kernel_tensor) {
112 std::vector<int64_t> &_begin = *begin;
113 std::vector<int64_t> &_end = *end;
114 std::vector<int64_t> &_stride = *stride;
115 auto ellipsis_mask_int = kernel_tensor->GetValueWithCheck<int64_t>();
116 auto ellipsis_mask = Dec2Bin(ellipsis_mask_int);
117 for (size_t k = 0; k < ellipsis_mask.size(); k++) {
118 if (k < kStridedSliceMaxDims && ellipsis_mask[k]) {
119 _begin[k] = 0;
120 _end[k] = input_shape[k];
121 _stride[k] = 1;
122 }
123 }
124 }
125
ComputNewAxisMask(std::vector<int64_t> * begin,std::vector<int64_t> * end,std::vector<int64_t> * stride,const ShapeVector & input_shape,KernelTensor * kernel_tensor)126 void ComputNewAxisMask(std::vector<int64_t> *begin, std::vector<int64_t> *end, std::vector<int64_t> *stride,
127 const ShapeVector &input_shape, KernelTensor *kernel_tensor) {
128 std::vector<int64_t> &_begin = *begin;
129 std::vector<int64_t> &_end = *end;
130 std::vector<int64_t> &_stride = *stride;
131 auto new_axis_mask_int = kernel_tensor->GetValueWithCheck<int64_t>();
132 auto new_axis_mask = Dec2Bin(new_axis_mask_int);
133 for (size_t l = 0; l < new_axis_mask.size(); l++) {
134 if (l < kStridedSliceMaxDims && new_axis_mask[l]) {
135 _begin[l] = 0;
136 _end[l] = input_shape[l];
137 _stride[l] = 1;
138 }
139 }
140 }
141
ComputeShrinkAxisMask(const std::vector<int64_t> & begin,std::vector<int64_t> * end,std::vector<int64_t> * stride,KernelTensor * kernel_tensor)142 void ComputeShrinkAxisMask(const std::vector<int64_t> &begin, std::vector<int64_t> *end, std::vector<int64_t> *stride,
143 KernelTensor *kernel_tensor) {
144 std::vector<int64_t> &_end = *end;
145 std::vector<int64_t> &_stride = *stride;
146 auto shrink_axis_mask_int = kernel_tensor->GetValueWithCheck<int64_t>();
147 auto shrink_axis_mask = Dec2Bin(shrink_axis_mask_int);
148 for (size_t m = 0; m < shrink_axis_mask.size(); m++) {
149 if (m < kStridedSliceMaxDims && shrink_axis_mask[m]) {
150 _end[m] = _end[m] > begin[m] ? begin[m] + 1 : begin[m] - 1;
151 _stride[m] = _end[m] > begin[m] ? 1 : -1;
152 }
153 }
154 }
155
ParseStrideSliceMasks(const std::vector<kernel::KernelTensor * > & inputs,std::vector<int64_t> * begin,std::vector<int64_t> * end,std::vector<int64_t> * stride,const ShapeVector & input_shape)156 void ParseStrideSliceMasks(const std::vector<kernel::KernelTensor *> &inputs, std::vector<int64_t> *begin,
157 std::vector<int64_t> *end, std::vector<int64_t> *stride, const ShapeVector &input_shape) {
158 ComputeBeginMask(begin, *stride, input_shape, inputs[kIndex4]);
159 ComputeEndMask(end, *stride, input_shape, inputs[kIndex5]);
160 ComputeEllipsisMask(begin, end, stride, input_shape, inputs[kIndex6]);
161 ComputNewAxisMask(begin, end, stride, input_shape, inputs[kIndex7]);
162 ComputeShrinkAxisMask(*begin, end, stride, inputs[kIndex8]);
163 }
164
165 // ===========================Old interface==========================================================
FillEmptyDims(const BaseOperatorPtr & base_operator,std::vector<int64_t> * begin,std::vector<int64_t> * end,std::vector<int64_t> * stride,ShapeVector * input_shape,bool is_gpu_strided)166 void FillEmptyDims(const BaseOperatorPtr &base_operator, std::vector<int64_t> *begin, std::vector<int64_t> *end,
167 std::vector<int64_t> *stride, ShapeVector *input_shape, bool is_gpu_strided) {
168 std::vector<int64_t> &_begin = *begin;
169 std::vector<int64_t> &_end = *end;
170 std::vector<int64_t> &_stride = *stride;
171 auto &_input_shape = *input_shape;
172 if (_begin.size() != _end.size() || _begin.size() != _stride.size() || _begin.size() > _input_shape.size()) {
173 MS_LOG(EXCEPTION) << "For '" << base_operator->name()
174 << "', the length of 'begin', 'stride' and 'end' should be equal "
175 "and less than or equal to the dimension of 'input_x', but got the length of 'begin': "
176 << _begin.size() << ", the length of 'stride': " << _stride.size()
177 << ", the length of 'end': " << _end.size()
178 << ", the dimension of 'input_x': " << _input_shape.size();
179 }
180
181 for (size_t i = 0; i < kStridedSliceMaxDims; i++) {
182 if (i >= _input_shape.size()) {
183 _input_shape.push_back(1);
184 }
185
186 if (i < _begin.size()) {
187 int64_t dim = _input_shape[i];
188 if (is_gpu_strided) {
189 // GPU kernel is flattened using offset to get stride slice
190 _begin[i] = std::min(_begin[i] < 0 ? std::max(_begin[i] + dim, static_cast<int64_t>(0)) : _begin[i], dim - 1);
191 } else {
192 // CPU using for begin is larger than end the circle will be break
193 _begin[i] = std::min(_begin[i] < 0 ? std::max(_begin[i] + dim, static_cast<int64_t>(0)) : _begin[i], dim);
194 }
195 } else {
196 _begin.push_back(0);
197 }
198
199 if (i < _end.size()) {
200 int64_t dim = _input_shape[i];
201 _end[i] = std::max(_end[i] < 0 ? _end[i] + dim : std::min(_end[i], dim), static_cast<int64_t>(-1));
202 } else {
203 _end.push_back(i < _input_shape.size() ? _input_shape[i] : 1);
204 }
205
206 if (i >= _stride.size()) {
207 _stride.push_back(1);
208 }
209 }
210 }
211
Scaling(size_t in_size,size_t out_size,bool align_corners)212 float Scaling(size_t in_size, size_t out_size, bool align_corners) {
213 return (align_corners && out_size > 1) ? SizeToFloat(in_size - 1) / static_cast<float>(out_size - 1)
214 : SizeToFloat(in_size) / static_cast<float>(out_size);
215 }
216
ScaleGrid(const int x,const float scale,bool half_pixel_centers)217 float ScaleGrid(const int x, const float scale, bool half_pixel_centers) {
218 if (half_pixel_centers) {
219 return (static_cast<float>(x) + 0.5f) * scale - 0.5f;
220 } else {
221 return static_cast<float>(x) * scale;
222 }
223 }
224
ComputeInterpolationWeights(const size_t out_size,const size_t in_size,const float scale,CachedInterpolation * interpolation,bool half_pixel_centers)225 void ComputeInterpolationWeights(const size_t out_size, const size_t in_size, const float scale,
226 CachedInterpolation *interpolation, bool half_pixel_centers) {
227 interpolation[out_size].lower = 0;
228 interpolation[out_size].upper = 0;
229 for (size_t i = 0; i <= out_size - 1; ++i) {
230 const float in = ScaleGrid(SizeToInt(i), scale, half_pixel_centers);
231 const float in_f = std::floor(in);
232 interpolation[i].lower = std::max(static_cast<int64_t>(in_f), static_cast<int64_t>(0));
233 interpolation[i].upper = std::min(static_cast<int64_t>(std::ceil(in)), static_cast<int64_t>(in_size - 1));
234 interpolation[i].lerp = in - in_f;
235 }
236 }
237
CheckSliceValid(const std::vector<int64_t> & start,const std::vector<int64_t> & stop,const std::vector<int64_t> & step,const std::vector<int64_t> & input_shape)238 void CheckSliceValid(const std::vector<int64_t> &start, const std::vector<int64_t> &stop,
239 const std::vector<int64_t> &step, const std::vector<int64_t> &input_shape) {
240 if (start.size() != stop.size() || start.size() != step.size() || start.size() > input_shape.size()) {
241 MS_LOG(EXCEPTION)
242 << "TensorCopySlices requires the length of begin, stride and end must be equal and less than input dimension.";
243 }
244
245 size_t size = start.size();
246 for (size_t i = 0; i < size; ++i) {
247 if (stop[i] <= start[i]) {
248 MS_LOG(EXCEPTION) << "Invalid slice: (" << start[i] << ", " << stop[i] << " ," << step[i] << ")";
249 }
250 // Operator need to be generalized in the future. Only support to copy continuous memory now.
251 if (step[i] != 1) {
252 MS_LOG(EXCEPTION) << "The element in step only support 1, but got:" << step;
253 }
254 }
255
256 size_t slice_pos = size;
257 for (size_t i = 0; i < size; ++i) {
258 if (stop[i] - start[i] > 1) {
259 slice_pos = i;
260 break;
261 }
262 }
263
264 for (size_t i = slice_pos + 1; i < size; ++i) {
265 if (stop[i] - start[i] != input_shape[i]) {
266 MS_LOG(EXCEPTION) << "Only support copy continuous memory now. For example tensor[0, 0:100] is fine, "
267 "but tensor[0:100, 0] is not supported.";
268 }
269 }
270 }
271
GetCopySize(const std::vector<int64_t> & dim_offset,const std::vector<int64_t> & start,const std::vector<int64_t> & stop)272 size_t GetCopySize(const std::vector<int64_t> &dim_offset, const std::vector<int64_t> &start,
273 const std::vector<int64_t> &stop) {
274 for (size_t i = 0; i < start.size(); ++i) {
275 if (stop[i] - start[i] != 1) {
276 return SizetMulWithOverflowCheck(LongToSize(stop[i] - start[i]), LongToSize(dim_offset[i]));
277 }
278 }
279 return LongToSize(dim_offset[start.size() - 1]);
280 }
281
CalDimOffset(const std::vector<int64_t> & input_shape)282 std::vector<int64_t> CalDimOffset(const std::vector<int64_t> &input_shape) {
283 std::vector<int64_t> dim_offset;
284 int64_t offset = 1;
285 for (auto iter = input_shape.rbegin(); iter != input_shape.rend(); ++iter) {
286 dim_offset.push_back(offset);
287 offset = offset * (*iter);
288 }
289 std::reverse(dim_offset.begin(), dim_offset.end());
290 return dim_offset;
291 }
292
CalOffset(const std::vector<int64_t> & start,const std::vector<int64_t> & stop,const std::vector<int64_t> & dim_offset)293 size_t CalOffset(const std::vector<int64_t> &start, const std::vector<int64_t> &stop,
294 const std::vector<int64_t> &dim_offset) {
295 size_t size = start.size();
296 size_t offset = 0;
297 for (size_t i = 0; i < size; ++i) {
298 offset += SizetMulWithOverflowCheck(LongToSize(dim_offset[i]), LongToSize(start[i]));
299 if (stop[i] - start[i] != 1) {
300 break;
301 }
302 }
303 return offset;
304 }
305
GetAlignments(const std::string & alignment)306 std::pair<MatrixDiag::Alignment, MatrixDiag::Alignment> GetAlignments(const std::string &alignment) {
307 static const mindspore::HashMap<std::string, std::pair<MatrixDiag::Alignment, MatrixDiag::Alignment>> AlignmentMap{
308 {"RIGHT_LEFT", {MatrixDiag::RIGHT, MatrixDiag::LEFT}},
309 {"LEFT_RIGHT", {MatrixDiag::LEFT, MatrixDiag::RIGHT}},
310 {"RIGHT_RIGHT", {MatrixDiag::RIGHT, MatrixDiag::RIGHT}},
311 {"LEFT_LEFT", {MatrixDiag::LEFT, MatrixDiag::LEFT}}};
312
313 auto alignment_iter = AlignmentMap.find(alignment);
314 if (alignment_iter == AlignmentMap.end()) {
315 MS_LOG(INTERNAL_EXCEPTION) << "For current kernel, input alignment is invalid: " << alignment
316 << ". please limit it to {RIGHT_LEFT, LEFT_RIGHT, RIGHT_RIGHT, LEFT_LEFT}";
317 }
318 return alignment_iter->second;
319 }
320
321 namespace broadcast_utils {
AlignedBroadCastShape(size_t align_rank,std::vector<size_t> * broadcast,std::vector<size_t> * lhs,std::vector<size_t> * rhs)322 bool AlignedBroadCastShape(size_t align_rank, std::vector<size_t> *broadcast, std::vector<size_t> *lhs,
323 std::vector<size_t> *rhs) {
324 if (broadcast == nullptr || lhs == nullptr || rhs == nullptr) {
325 MS_LOG(ERROR) << "input is nullptr.";
326 return false;
327 }
328 size_t broadcast_rank = broadcast->size();
329 size_t l_rank = lhs->size();
330 size_t r_rank = rhs->size();
331 if (broadcast_rank > align_rank || l_rank > align_rank || r_rank > align_rank) {
332 return false;
333 }
334 std::vector<size_t> aligned_broadcast(align_rank, 1);
335 std::vector<size_t> aligned_lhs(align_rank, 1);
336 std::vector<size_t> aligned_rhs(align_rank, 1);
337 size_t broadcast_offset = align_rank - broadcast_rank;
338 for (size_t i = 0; i < broadcast_rank; i++) {
339 aligned_broadcast[i + broadcast_offset] = (*broadcast)[i];
340 }
341
342 size_t l_offset = align_rank - l_rank;
343 for (size_t i = 0; i < l_rank; i++) {
344 aligned_lhs[i + l_offset] = (*lhs)[i];
345 }
346 size_t r_offset = align_rank - r_rank;
347 for (size_t i = 0; i < r_rank; i++) {
348 aligned_rhs[i + r_offset] = (*rhs)[i];
349 }
350 *broadcast = aligned_broadcast;
351 *lhs = aligned_lhs;
352 *rhs = aligned_rhs;
353 return true;
354 }
355 } // namespace broadcast_utils
356 } // namespace kernel
357 } // namespace mindspore
358