1 /**
2 * Copyright 2019 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "minddata/dataset/core/tensor.h"
17
18 #include <algorithm>
19 #include <iomanip>
20 #include <iostream>
21 #include <fstream>
22 #include <functional>
23 #include <limits>
24 #include <memory>
25 #include <vector>
26 #include <utility>
27
28 #include "minddata/dataset/core/type_id.h"
29
30 #include "utils/ms_utils.h"
31 #include "minddata/dataset/include/dataset/constants.h"
32
33 #ifndef ENABLE_ANDROID
34 #include "minddata/dataset/core/cv_tensor.h"
35 #endif
36
37 #include "minddata/dataset/core/global_context.h"
38
39 #ifdef ENABLE_PYTHON
40 #include "minddata/dataset/core/pybind_support.h"
41 namespace py = pybind11;
42 #endif
43
44 #include "minddata/dataset/core/tensor_shape.h"
45
46 namespace mindspore {
47 namespace dataset {
48 // Helper macros for printing tensor elements
49 #define CASE_PRINT(de_type, native_type) \
50 case de_type: { \
51 native_type o; \
52 rc = GetItemAt<native_type>(&o, index); \
53 out << o; \
54 break; \
55 }
56
57 #define CASE_PRINT_HEX(de_type, native_type) \
58 case de_type: { \
59 native_type o; \
60 rc = GetItemAt<native_type>(&o, index); \
61 out << std::hex << std::setw(2) << std::setfill('0') << o << std::dec << std::setfill(' '); \
62 break; \
63 }
64
Tensor(const TensorShape & shape,const DataType & type)65 Tensor::Tensor(const TensorShape &shape, const DataType &type) : shape_(shape), type_(type), data_(nullptr) {
66 // grab the mem pool from global context and create the allocator for char data area
67 std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool();
68 data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
69 }
70
Tensor(Tensor && other)71 Tensor::Tensor(Tensor &&other) noexcept
72 : shape_(other.shape()),
73 type_(other.type()),
74 data_(other.GetMutableBuffer()),
75 data_end_(other.data_end_),
76 data_allocator_(std::move(other.data_allocator_)) {
77 other.Invalidate();
78 }
79
operator =(Tensor && other)80 Tensor &Tensor::operator=(Tensor &&other) noexcept {
81 if (&other != this) {
82 shape_ = other.shape();
83 type_ = other.type();
84 data_ = other.GetMutableBuffer();
85 data_end_ = other.data_end_;
86 data_allocator_ = std::move(other.data_allocator_);
87 other.Invalidate();
88 }
89 return *this;
90 }
CreateEmpty(const TensorShape & shape,const DataType & type,TensorPtr * out)91 Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out) {
92 CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape.");
93 CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type.");
94 RETURN_UNEXPECTED_IF_NULL(out);
95 const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
96 *out = std::allocate_shared<Tensor>(*alloc, shape, type);
97 CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
98 // if it's a string tensor and it has no elements, Just initialize the shape and type.
99 if (!type.IsNumeric() && shape.NumOfElements() == 0) {
100 return Status::OK();
101 }
102
103 CHECK_FAIL_RETURN_UNEXPECTED(type.IsNumeric(), "Number of elements is not 0. The type should be numeric.");
104
105 int64_t byte_size = (*out)->SizeInBytes();
106
107 // Don't allocate if we have a tensor with no elements.
108 if (byte_size != 0) {
109 RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
110 }
111 return Status::OK();
112 }
CreateFromMemory(const TensorShape & shape,const DataType & type,const uchar * src,TensorPtr * out)113 Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) {
114 RETURN_IF_NOT_OK(CreateEmpty(shape, type, out));
115 if (src != nullptr && out != nullptr) {
116 // Given the shape/type of this tensor, compute the data size and copy in the input bytes.
117 int64_t byte_size = (*out)->SizeInBytes();
118 if (byte_size == 0) {
119 return Status::OK();
120 }
121 if (byte_size < SECUREC_MEM_MAX_LEN) {
122 int ret_code = memcpy_s((*out)->data_, byte_size, src, byte_size);
123 CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to copy data into tensor.");
124 } else {
125 auto ret_code = std::memcpy((*out)->data_, src, byte_size);
126 CHECK_FAIL_RETURN_UNEXPECTED(ret_code == (*out)->data_, "Failed to copy data into tensor.");
127 }
128 }
129 return Status::OK();
130 }
131
CreateFromMemory(const TensorShape & shape,const DataType & type,const unsigned char * src,const dsize_t & length,TensorPtr * out)132 Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const unsigned char *src,
133 const dsize_t &length, TensorPtr *out) {
134 RETURN_UNEXPECTED_IF_NULL(src);
135 RETURN_UNEXPECTED_IF_NULL(out);
136 const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
137 *out = std::allocate_shared<Tensor>(*alloc, shape, type);
138 CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
139 if (type.IsNumeric()) {
140 dsize_t calculated_length = (*out)->SizeInBytes();
141 CHECK_FAIL_RETURN_UNEXPECTED(calculated_length == length, "Length of source data does not match the shape.");
142 } else {
143 // min_length is the length of a tensor with empty strings
144 // min_length = the number of bytes needed to store the offsets + 1 byte for each element
145 dsize_t min_length = (shape.NumOfElements() + 1) * kOffsetSize + shape.NumOfElements();
146 CHECK_FAIL_RETURN_UNEXPECTED(min_length <= length, "Length of source data does not match the shape.");
147 }
148
149 RETURN_IF_NOT_OK((*out)->AllocateBuffer(length));
150 if (length == 0) {
151 return Status::OK();
152 }
153 if (length < SECUREC_MEM_MAX_LEN) {
154 int ret_code = memcpy_s((*out)->data_, length, src, length);
155 CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to copy data into tensor.");
156 } else {
157 auto ret_code = std::memcpy((*out)->data_, src, length);
158 CHECK_FAIL_RETURN_UNEXPECTED(ret_code == (*out)->data_, "Failed to copy data into tensor.");
159 }
160
161 return Status::OK();
162 }
163
164 #ifdef ENABLE_PYTHON
CreateFromNpString(py::array arr,std::shared_ptr<Tensor> * out)165 Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
166 RETURN_UNEXPECTED_IF_NULL(out);
167 std::vector<dsize_t> shape;
168 for (dsize_t i = 0; i < arr.ndim(); i++) {
169 shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
170 }
171 arr.resize({arr.size()}); // flatten the py::array so we can iterate once
172 std::vector<std::string> strings;
173
174 if (arr.dtype().kind() == 'U') {
175 (void)std::for_each(arr.begin(), arr.end(),
176 [&strings](const auto &s) { strings.emplace_back(py::cast<py::str>(s)); });
177 } else {
178 (void)std::for_each(arr.begin(), arr.end(),
179 [&strings](const auto &s) { strings.emplace_back(py::cast<py::bytes>(s)); });
180 }
181
182 arr.resize(shape); // resize arr back to the original shape
183
184 return CreateFromVector(strings, TensorShape{shape}, out);
185 }
186
CreateFromNpArray(const py::array & arr,std::shared_ptr<Tensor> * out)187 Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *out) {
188 RETURN_UNEXPECTED_IF_NULL(out);
189 if (DataType::FromNpArray(arr) == DataType::DE_STRING) {
190 return CreateFromNpString(arr, out);
191 }
192
193 std::vector<dsize_t> shape;
194 std::vector<dsize_t> strides;
195 // check if strides are contiguous
196 bool is_strided = false;
197 dsize_t count = arr.size();
198 for (dsize_t i = 0; i < arr.ndim(); i++) {
199 shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
200 strides.push_back(static_cast<dsize_t>(arr.strides()[i]));
201 // in case of empty array num_items=0
202 if (count != 0 && shape.size() > i && shape[i] != 0) {
203 count /= shape[i];
204 if (strides[i] != arr.itemsize() * count) {
205 is_strided = true;
206 }
207 }
208 }
209
210 unsigned char *data = static_cast<unsigned char *>(arr.request().ptr);
211
212 if (is_strided) {
213 RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape(shape), DataType::FromNpArray(arr), out));
214 RETURN_IF_NOT_OK(CopyStridedArray((*out)->data_, data, shape, strides, (*out)->type_.SizeInBytes()));
215 } else {
216 RETURN_IF_NOT_OK(Tensor::CreateFromMemory(TensorShape(shape), DataType::FromNpArray(arr), data, out));
217 }
218 return Status::OK();
219 }
220 #endif
221
222 #ifndef ENABLE_ANDROID
CreateFromByteList(const dataengine::BytesList & bytes_list,const TensorShape & shape,TensorPtr * out)223 Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out) {
224 RETURN_UNEXPECTED_IF_NULL(out);
225 const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
226 *out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(bytes_list.value_size())}),
227 DataType(DataType::DE_STRING));
228 CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
229 // total bytes needed = offset array + strings
230 // offset array needs to store one offset var per element + 1 extra to get the length of the last string.
231 // strings will be null-terminated --> need 1 extra byte per element
232 dsize_t num_bytes = (kOffsetSize) * (*out)->shape_.NumOfElements() + kOffsetSize + bytes_list.ByteSizeLong();
233
234 (*out)->data_ = (*out)->data_allocator_->allocate(num_bytes);
235
236 auto offset_arr = reinterpret_cast<offset_t *>((*out)->data_);
237 uchar *buf = (*out)->GetStringsBuffer();
238
239 offset_t offset = buf - (*out)->data_; // the first string will start here
240 int32_t i = 0;
241 for (; i < bytes_list.value_size(); i++) {
242 const std::string &str = bytes_list.value(i);
243 // insert the start index of the string.
244 offset_arr[i] = offset;
245 // total bytes are reduced by kOffsetSize
246 num_bytes -= kOffsetSize;
247 // insert actual string
248 int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
249 CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Cannot copy string into Tensor");
250 // next string will be stored right after the current one.
251 offset = offset + str.length() + 1;
252 // total bytes are reduced by the length of the string
253 num_bytes -= str.length() + 1;
254 }
255 // store one more offset value so we can get the length of the last string
256 // length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
257 offset_arr[i] = offset;
258
259 (*out)->data_end_ = (*out)->data_ + offset_arr[i];
260
261 MS_ASSERT(num_bytes == 0);
262 RETURN_IF_NOT_OK((*out)->Reshape(shape));
263 return Status::OK();
264 }
265 #endif
266
CreateFromFile(const std::string & path,std::shared_ptr<Tensor> * out)267 Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *out) {
268 RETURN_UNEXPECTED_IF_NULL(out);
269 Path file(path);
270 if (file.IsDirectory()) {
271 RETURN_STATUS_UNEXPECTED("Invalid file found: " + path + ", should be file, but got directory.");
272 }
273 std::ifstream fs;
274 fs.open(path, std::ios::binary | std::ios::in);
275 CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Failed to open file: " + path);
276 int64_t num_bytes = fs.seekg(0, std::ios::end).tellg();
277 CHECK_FAIL_RETURN_UNEXPECTED(num_bytes < kDeMaxDim, "Invalid file to allocate tensor memory, check path: " + path);
278 CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Failed to find size of file, check path: " + path);
279 RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out));
280 int64_t written_bytes = fs.read(reinterpret_cast<char *>((*out)->GetMutableBuffer()), num_bytes).gcount();
281 if (!(written_bytes == num_bytes && fs.good())) {
282 fs.close();
283 RETURN_STATUS_UNEXPECTED("Error in writing to tensor, check path: " + path);
284 }
285 fs.close();
286 return Status::OK();
287 }
288
289 #ifndef ENABLE_ANDROID
CreateFromByteList(const dataengine::BytesList & bytes_list,const TensorShape & shape,const DataType & type,dsize_t pad_size,TensorPtr * out)290 Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
291 const DataType &type, dsize_t pad_size, TensorPtr *out) {
292 RETURN_UNEXPECTED_IF_NULL(out);
293 RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, type, out));
294
295 RETURN_UNEXPECTED_IF_NULL(out);
296 unsigned char *current_tensor_addr = (*out)->GetMutableBuffer();
297 int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size;
298
299 for (int i = 0; i < bytes_list.value_size(); i++) {
300 // read string data into tensor
301 const std::string ¤t_element = bytes_list.value(i);
302 int return_code =
303 memcpy_s(current_tensor_addr, tensor_bytes_remaining, common::SafeCStr(current_element), current_element.size());
304
305 CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed when reading bytesList element into Tensor");
306
307 current_tensor_addr += current_element.size();
308 tensor_bytes_remaining -= current_element.size();
309
310 // pad
311 int64_t chars_to_pad = pad_size - current_element.size();
312 return_code = memset_s(current_tensor_addr, tensor_bytes_remaining, static_cast<int>(' '), chars_to_pad);
313 CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed when padding Tensor");
314
315 current_tensor_addr += chars_to_pad;
316 tensor_bytes_remaining -= chars_to_pad;
317 }
318
319 return Status::OK();
320 }
321 #endif
322
323 // Memcpy the given strided array's used part to consecutive memory
324 // Consider a 3-d array
325 // A[(i * shape[1] + j) * shape[2] + k] = B[i][j][k] = C[i * strides[0] + j * strides[1] + k * strides[2]]
326 // Here we convert array C to array A, by memcpy index by index (Note that not all elements in C is copied)
CopyStridedArray(unsigned char * dst,unsigned char * src,std::vector<dsize_t> shape,std::vector<dsize_t> strides,uint8_t type_size)327 Status Tensor::CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
328 std::vector<dsize_t> strides, uint8_t type_size) {
329 RETURN_UNEXPECTED_IF_NULL(dst);
330 RETURN_UNEXPECTED_IF_NULL(src);
331 dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>());
332 for (dsize_t i = 0; i < size; ++i) {
333 dsize_t offset = 0;
334 dsize_t count = i;
335 for (size_t j = 0; j < shape.size(); ++j) {
336 // convert 1d array's index to 3d array's index (A -> B)
337 CHECK_FAIL_RETURN_UNEXPECTED(shape[shape.size() - 1 - j] != 0, "Invalid data, shape can't be zero.");
338 dsize_t idx = count % shape[shape.size() - 1 - j];
339 count /= shape[shape.size() - 1 - j];
340 // calculate the raw data offset based on strides (B -> C)
341 offset += idx * strides[shape.size() - 1 - j];
342 // once count = 0, the following idxes are all zero, skip them
343 if (count == 0) {
344 break;
345 }
346 }
347 // strides already consider byte size of the data type, but dst doesn't.
348 // dst[i] = dst + i * type_size = src + offset
349 int ret_code = memcpy_s(dst + i * type_size, type_size, src + offset, type_size);
350 if (ret_code != 0) {
351 RETURN_STATUS_UNEXPECTED("Failed to copy data into Tensor.");
352 }
353 }
354 return Status::OK();
355 }
356
357 // Name: Destructor
358 // Description: Destructor
~Tensor()359 Tensor::~Tensor() {
360 if (data_ != nullptr) {
361 if (data_allocator_ != nullptr) {
362 data_allocator_->deallocate(data_);
363 data_ = nullptr;
364 data_end_ = nullptr;
365 } else {
366 // If we didn't have an allocator, but data_ is not null then it must
367 // be a stand-alone tensor that used malloc directly.
368 free(data_);
369 data_ = nullptr;
370 data_end_ = nullptr;
371 }
372 }
373 }
374
operator ==(const Tensor & rhs) const375 bool Tensor::operator==(const Tensor &rhs) const {
376 // 1. different shape 2. different type 3. one data_ is nullptr and the other is not
377 if (shape_ != rhs.shape() || type_ != rhs.type_ || (data_ == nullptr && rhs.data_ != nullptr) ||
378 (data_ != nullptr && rhs.data_ == nullptr)) {
379 return false;
380 }
381 if (data_ == nullptr && rhs.data_ == nullptr) {
382 return true;
383 }
384 // use mem compare to compare the two data, size are already verified
385 return memcmp(data_, rhs.data_, SizeInBytes()) == 0;
386 }
387
388 // Name: PrintItemAt()
389 // Description: A function that print the value as specified by its index
PrintItemAt(const std::vector<dsize_t> & index,std::ostream & out) const390 void Tensor::PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const {
391 Status rc;
392 MS_ASSERT(data_);
393
394 switch (type_.value()) {
395 CASE_PRINT_HEX(DataType::DE_BOOL, bool)
396
397 CASE_PRINT_HEX(DataType::DE_INT8, int8_t)
398
399 CASE_PRINT_HEX(DataType::DE_UINT8, uint8_t)
400
401 CASE_PRINT(DataType::DE_INT16, int16_t)
402
403 CASE_PRINT(DataType::DE_UINT16, uint16_t)
404
405 CASE_PRINT(DataType::DE_INT32, int32_t)
406
407 CASE_PRINT(DataType::DE_UINT32, uint32_t)
408
409 CASE_PRINT(DataType::DE_INT64, int64_t)
410
411 CASE_PRINT(DataType::DE_UINT64, uint64_t)
412
413 CASE_PRINT(DataType::DE_FLOAT16, float16)
414
415 CASE_PRINT(DataType::DE_FLOAT32, float)
416
417 CASE_PRINT(DataType::DE_FLOAT64, double)
418
419 case DataType::DE_STRING: {
420 std::string_view o{""};
421 rc = GetItemAt(&o, index);
422 out << "\"" << o << "\"";
423 break;
424 }
425 default: {
426 out << "?";
427 break;
428 }
429 }
430 if (rc.IsError()) {
431 out << rc.ToString();
432 }
433 }
434
435 // Name: PrintRecursive()
436 // Description: A function that prints Tensor recursively, first called by print
PrintRecursive(std::ostream & out,int32_t cur_dim,const std::vector<dsize_t> & cur_index) const437 void Tensor::PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const {
438 if (cur_index.size() == shape_.Rank()) {
439 PrintItemAt(cur_index, out);
440 } else {
441 out << "[";
442 for (dsize_t i = 0; i < shape_[cur_dim]; i++) {
443 std::vector<dsize_t> new_index = cur_index;
444 new_index.push_back(i);
445 PrintRecursive(out, cur_dim + 1, new_index);
446 if (i < shape_[cur_dim] - 1) {
447 out << ",";
448 }
449 }
450 out << "]";
451 }
452 }
453
454 // Name: Print()
455 // Description: A function that prints info about the tensor
Print(std::ostream & out) const456 void Tensor::Print(std::ostream &out) const {
457 out << "Tensor (shape: ";
458 out << shape_;
459 out << ", Type: " << type_ << ")\n";
460 if (data_) {
461 PrintRecursive(out, 0, std::vector<dsize_t>{});
462 } else {
463 out << "[Data area is null]";
464 }
465 }
466
PrintData(std::ostream & out) const467 void Tensor::PrintData(std::ostream &out) const {
468 if (data_) {
469 PrintRecursive(out, 0, std::vector<dsize_t>{});
470 }
471 }
472
AllocateBuffer(const dsize_t & length)473 Status Tensor::AllocateBuffer(const dsize_t &length) {
474 RETURN_UNEXPECTED_IF_NULL(data_allocator_);
475 if (data_ == nullptr) {
476 data_ = data_allocator_->allocate(length);
477 CHECK_FAIL_RETURN_UNEXPECTED(data_ != nullptr, "Failed to allocate memory for tensor.");
478 data_end_ = data_ + length;
479 }
480 return Status::OK();
481 }
482
Reshape(const TensorShape & shape)483 Status Tensor::Reshape(const TensorShape &shape) {
484 if (shape.NumOfElements() == shape_.NumOfElements()) {
485 shape_ = shape;
486 return Status::OK();
487 } else {
488 std::string err = "Cannot reshape, Number of elements do not match";
489 RETURN_STATUS_UNEXPECTED(err);
490 }
491 }
492
Invalidate()493 void Tensor::Invalidate() {
494 shape_ = TensorShape::CreateUnknownRankShape();
495 type_ = DataType(DataType::DE_UNKNOWN);
496 data_ = nullptr;
497 data_end_ = nullptr;
498 data_allocator_ = nullptr;
499 }
500
501 template <typename T>
GetItemPtr(T ** ptr,const std::vector<dsize_t> & index) const502 Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
503 RETURN_UNEXPECTED_IF_NULL(ptr);
504 if (type_.IsCompatible<T>()) {
505 if (data_ == nullptr) {
506 std::string err = "Data is not allocated yet";
507 RETURN_STATUS_UNEXPECTED(err);
508 }
509 dsize_t flat_idx;
510 RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
511 *ptr = reinterpret_cast<T *>(data_ + flat_idx * type_.SizeInBytes());
512 RETURN_UNEXPECTED_IF_NULL(ptr);
513
514 return Status::OK();
515 } else {
516 std::string err = "data type not compatible";
517 RETURN_STATUS_UNEXPECTED(err);
518 }
519 }
520
GetItemPtr(uchar ** ptr,const std::vector<dsize_t> & index,offset_t * length) const521 Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset_t *length) const {
522 RETURN_UNEXPECTED_IF_NULL(ptr);
523 RETURN_UNEXPECTED_IF_NULL(length);
524 if (type_ == DataType::DE_STRING) {
525 if (data_ == nullptr) {
526 std::string err = "Data is not allocated yet";
527 RETURN_STATUS_UNEXPECTED(err);
528 }
529 dsize_t flat_idx;
530 RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
531 offset_t length_temp = 0;
532 RETURN_IF_NOT_OK(GetStringAt(flat_idx, ptr, &length_temp));
533 *length = length_temp;
534 return Status::OK();
535 } else {
536 std::string err = "data type not compatible";
537 RETURN_STATUS_UNEXPECTED(err);
538 }
539 }
540
StartAddrOfIndex(std::vector<dsize_t> ind,uchar ** start_addr_of_index,TensorShape * remaining)541 Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining) {
542 RETURN_UNEXPECTED_IF_NULL(start_addr_of_index);
543 RETURN_UNEXPECTED_IF_NULL(remaining);
544 if (type() == DataType::DE_STRING) {
545 RETURN_STATUS_UNEXPECTED("StartAddrOfIndex does not support string tensors yet.");
546 }
547
548 dsize_t flat_ind;
549 std::vector<dsize_t> t_shape = shape().AsVector();
550 std::vector<dsize_t> r(t_shape.begin() + ind.size(), t_shape.end());
551 *remaining = TensorShape(r);
552 ind.resize(this->Rank(), 0); // same as -> while (ind.size() < this->Rank()) ind.push_back(0);
553
554 RETURN_IF_NOT_OK(shape_.ToFlatIndex(ind, &flat_ind));
555 // check if GetBuffer() returns null, we should flag this as an error, this sanity check will only
556 // be true is the tensor failed to allocate memory.
557 if (GetMutableBuffer() == nullptr) {
558 RETURN_STATUS_UNEXPECTED("Invalid GetBuffer in Tensor, got nullptr");
559 }
560 *start_addr_of_index = GetMutableBuffer() + flat_ind * this->type().SizeInBytes();
561 return Status::OK();
562 }
563
InsertTensor(const std::vector<dsize_t> & ind,const std::shared_ptr<Tensor> & tensor,const bool partial_insert)564 Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor,
565 const bool partial_insert) {
566 RETURN_UNEXPECTED_IF_NULL(tensor);
567 std::string err_msg;
568 if (partial_insert) {
569 err_msg += (ind.size() != 1)
570 ? "[Tensor] only supports 1D insertion of elements not along the full length of the axis\n"
571 : "";
572 err_msg +=
573 (ind.at(0) + tensor->shape().NumOfElements() > shape().NumOfElements()) ? "[Tensor] incorrect index\n" : "";
574 } else {
575 err_msg += (ind.size() + tensor->Rank() != Rank()) ? "[Tensor] incorrect index\n" : "";
576 }
577 err_msg += (type() == DataType::DE_STRING) ? "[Tensor] Cannot insert into a tensor of type string\n" : "";
578 err_msg += (!shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : "";
579
580 err_msg += tensor->type().SizeInBytes() != type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : "";
581 uchar *start_addr_of_ind = nullptr;
582 if (partial_insert) {
583 TensorShape remaining_shape = tensor->shape();
584 err_msg +=
585 (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : "";
586 } else {
587 TensorShape remaining_shape = TensorShape::CreateUnknownRankShape();
588 err_msg +=
589 (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : "";
590 err_msg += !(remaining_shape == tensor->shape()) ? "[Tensor] memory error\n" : "";
591 }
592
593 if (!err_msg.empty()) {
594 MS_LOG(DEBUG) << "Insert tensor message: " << err_msg;
595 RETURN_STATUS_UNEXPECTED(err_msg);
596 } else {
597 if (start_addr_of_ind != nullptr) {
598 int ret_code =
599 memcpy_s(start_addr_of_ind, tensor->SizeInBytes(), tensor->GetMutableBuffer(), tensor->SizeInBytes());
600 if (ret_code == 0) {
601 return Status::OK();
602 } else {
603 err_msg += "[Tensor] error in memcpy_s when inserting tensor\n";
604 MS_LOG(DEBUG) << "Tensor message: " << err_msg;
605 RETURN_STATUS_UNEXPECTED(err_msg);
606 }
607 } else {
608 RETURN_STATUS_UNEXPECTED("Failed to create memory for Tensor.");
609 }
610 }
611 }
612
ExpandDim(const dsize_t & axis)613 Status Tensor::ExpandDim(const dsize_t &axis) {
614 if (axis > Rank()) {
615 std::string err = "Axis is out of bound";
616 RETURN_STATUS_UNEXPECTED(err);
617 }
618 if (axis == Rank()) {
619 shape_ = shape_.AppendDim(1);
620 } else {
621 shape_ = shape_.InsertDim(axis, 1);
622 }
623 return Status::OK();
624 }
625
Strides() const626 std::vector<dsize_t> Tensor::Strides() const {
627 std::vector<dsize_t> strides = shape_.Strides();
628 uint8_t size = type_.SizeInBytes();
629 (void)std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; });
630 return strides;
631 }
632
633 #ifdef ENABLE_PYTHON
GetBufferInfo(Tensor * t,py::buffer_info * out)634 Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
635 RETURN_UNEXPECTED_IF_NULL(t);
636 RETURN_UNEXPECTED_IF_NULL(out);
637 CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
638
639 std::string format_desc = t->type().GetPybindFormat();
640 if (format_desc.empty()) {
641 RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format");
642 }
643 *out = py::buffer_info(t->GetMutableBuffer(), /* Pointer to buffer */
644 t->type().SizeInBytes(), /* Size of one scalar */
645 format_desc, /* Python struct-style format descriptor */
646 t->Rank(), /* Number of dimensions */
647 t->shape().AsVector(), /* Buffer dimensions */
648 t->Strides());
649 RETURN_UNEXPECTED_IF_NULL(out);
650 return Status::OK();
651 }
652 #endif
653
to_json(nlohmann::json * out_json)654 Status Tensor::to_json(nlohmann::json *out_json) {
655 nlohmann::json args;
656 args["shape"] = shape_.AsVector();
657 args["type"] = type_.ToString();
658 if (type_ == DataType::DE_BOOL) {
659 RETURN_IF_NOT_OK(to_json_convert<bool>(&args));
660 } else if (type_ == DataType::DE_INT8) {
661 RETURN_IF_NOT_OK(to_json_convert<int8_t>(&args));
662 } else if (type_ == DataType::DE_INT16) {
663 RETURN_IF_NOT_OK(to_json_convert<int16_t>(&args));
664 } else if (type_ == DataType::DE_INT32) {
665 RETURN_IF_NOT_OK(to_json_convert<int32_t>(&args));
666 } else if (type_ == DataType::DE_INT64) {
667 RETURN_IF_NOT_OK(to_json_convert<int64_t>(&args));
668 } else if (type_ == DataType::DE_UINT8) {
669 RETURN_IF_NOT_OK(to_json_convert<uint8_t>(&args));
670 } else if (type_ == DataType::DE_UINT16) {
671 RETURN_IF_NOT_OK(to_json_convert<uint16_t>(&args));
672 } else if (type_ == DataType::DE_UINT32) {
673 RETURN_IF_NOT_OK(to_json_convert<uint32_t>(&args));
674 } else if (type_ == DataType::DE_UINT64) {
675 RETURN_IF_NOT_OK(to_json_convert<uint64_t>(&args));
676 } else if (type_ == DataType::DE_FLOAT32) {
677 RETURN_IF_NOT_OK(to_json_convert<float>(&args));
678 } else if (type_ == DataType::DE_FLOAT64) {
679 RETURN_IF_NOT_OK(to_json_convert<double>(&args));
680 } else if (type_ == DataType::DE_STRING) {
681 std::vector<std::string> data_out;
682 for (auto it = this->begin<std::string_view>(); it != this->end<std::string_view>(); it++) {
683 data_out.emplace_back(*it);
684 }
685 args["data"] = data_out;
686 } else {
687 return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor");
688 }
689 *out_json = args;
690 return Status::OK();
691 }
692
693 template <typename T>
to_json_convert(nlohmann::json * args)694 Status Tensor::to_json_convert(nlohmann::json *args) {
695 std::vector<T> data_out;
696 for (auto it = this->begin<T>(); it != this->end<T>(); it++) {
697 data_out.emplace_back(*it);
698 }
699 (*args)["data"] = data_out;
700 return Status::OK();
701 }
702
from_json(nlohmann::json op_params,std::shared_ptr<Tensor> * tensor)703 Status Tensor::from_json(nlohmann::json op_params, std::shared_ptr<Tensor> *tensor) {
704 CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shape") != op_params.end(), "Failed to find shape");
705 CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("type") != op_params.end(), "Failed to find type");
706 CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("data") != op_params.end(), "Failed to find data");
707 std::string type = op_params["type"];
708 std::vector<dsize_t> list = op_params["shape"];
709 TensorShape shape = TensorShape(list);
710 if (type == "bool") {
711 RETURN_IF_NOT_OK(from_json_convert<bool>(op_params["data"], shape, tensor));
712 } else if (type == "int8") {
713 RETURN_IF_NOT_OK(from_json_convert<int8_t>(op_params["data"], shape, tensor));
714 } else if (type == "int16") {
715 RETURN_IF_NOT_OK(from_json_convert<int16_t>(op_params["data"], shape, tensor));
716 } else if (type == "int32") {
717 RETURN_IF_NOT_OK(from_json_convert<int32_t>(op_params["data"], shape, tensor));
718 } else if (type == "int64") {
719 RETURN_IF_NOT_OK(from_json_convert<int64_t>(op_params["data"], shape, tensor));
720 } else if (type == "uint8") {
721 RETURN_IF_NOT_OK(from_json_convert<uint8_t>(op_params["data"], shape, tensor));
722 } else if (type == "uint16") {
723 RETURN_IF_NOT_OK(from_json_convert<uint16_t>(op_params["data"], shape, tensor));
724 } else if (type == "uint32") {
725 RETURN_IF_NOT_OK(from_json_convert<uint32_t>(op_params["data"], shape, tensor));
726 } else if (type == "uint64") {
727 RETURN_IF_NOT_OK(from_json_convert<uint64_t>(op_params["data"], shape, tensor));
728 } else if (type == "float32") {
729 RETURN_IF_NOT_OK(from_json_convert<float>(op_params["data"], shape, tensor));
730 } else if (type == "float64") {
731 RETURN_IF_NOT_OK(from_json_convert<double>(op_params["data"], shape, tensor));
732 } else if (type == "string") {
733 RETURN_IF_NOT_OK(from_json_convert<std::string>(op_params["data"], shape, tensor));
734 } else {
735 return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor");
736 }
737 return Status::OK();
738 }
739
740 template <typename T>
from_json_convert(nlohmann::json json_data,TensorShape shape,std::shared_ptr<Tensor> * tensor)741 Status Tensor::from_json_convert(nlohmann::json json_data, TensorShape shape, std::shared_ptr<Tensor> *tensor) {
742 std::vector<T> data = json_data;
743 RETURN_IF_NOT_OK(CreateFromVector(data, shape, tensor));
744 return Status::OK();
745 }
746
747 template <typename T>
GetItemAt(T * o,const std::vector<dsize_t> & index) const748 Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
749 RETURN_UNEXPECTED_IF_NULL(o);
750 if (data_ == nullptr) {
751 RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
752 }
753 if (!type_.IsLooselyCompatible<T>()) {
754 std::string err = "Template type and Tensor type are not compatible";
755 RETURN_STATUS_UNEXPECTED(err);
756 }
757 if (type_.IsUnsignedInt()) {
758 RETURN_IF_NOT_OK(GetUnsignedIntAt<T>(o, index));
759 } else if (type_.IsSignedInt()) {
760 RETURN_IF_NOT_OK(GetSignedIntAt<T>(o, index));
761 } else if (type_.IsFloat()) {
762 RETURN_IF_NOT_OK(GetFloatAt<T>(o, index));
763 } else if (type_.IsBool()) {
764 bool *ptr = nullptr;
765 RETURN_IF_NOT_OK(GetItemPtr<bool>(&ptr, index));
766 *o = static_cast<T>(*ptr);
767 } else {
768 std::string err = "Tensor Type is unknown";
769 RETURN_STATUS_UNEXPECTED(err);
770 }
771 return Status::OK();
772 }
773
GetItemAt(std::string_view * o,const std::vector<dsize_t> & index) const774 Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const {
775 RETURN_UNEXPECTED_IF_NULL(data_);
776 RETURN_UNEXPECTED_IF_NULL(o);
777 CHECK_FAIL_RETURN_UNEXPECTED(type_ == DataType::DE_STRING, "Tensor type is not a string");
778
779 uchar *start = nullptr;
780 offset_t length = 0;
781 RETURN_IF_NOT_OK(GetItemPtr(&start, index, &length));
782 std::string_view sv{reinterpret_cast<const char *>(start)};
783 o->swap(sv);
784 return Status::OK();
785 }
786
787 #ifdef ENABLE_PYTHON
788 // return data as numpy, should return status
GetDataAsNumpy(py::array * data)789 Status Tensor::GetDataAsNumpy(py::array *data) {
790 RETURN_UNEXPECTED_IF_NULL(data);
791 if (type_ == DataType::DE_BOOL) {
792 *data = py::array_t<bool>(shape_.AsVector(), reinterpret_cast<bool *>(data_));
793 } else if (type_ == DataType::DE_INT8) {
794 *data = py::array_t<int8_t>(shape_.AsVector(), reinterpret_cast<int8_t *>(data_));
795 } else if (type_ == DataType::DE_INT16) {
796 *data = py::array_t<int16_t>(shape_.AsVector(), reinterpret_cast<int16_t *>(data_));
797 } else if (type_ == DataType::DE_INT32) {
798 *data = py::array_t<int32_t>(shape_.AsVector(), reinterpret_cast<int32_t *>(data_));
799 } else if (type_ == DataType::DE_INT64) {
800 *data = py::array_t<int64_t>(shape_.AsVector(), reinterpret_cast<int64_t *>(data_));
801 } else if (type_ == DataType::DE_UINT8) {
802 *data = py::array_t<uint8_t>(shape_.AsVector(), reinterpret_cast<uint8_t *>(data_));
803 } else if (type_ == DataType::DE_UINT16) {
804 *data = py::array_t<uint16_t>(shape_.AsVector(), reinterpret_cast<uint16_t *>(data_));
805 } else if (type_ == DataType::DE_UINT32) {
806 *data = py::array_t<uint32_t>(shape_.AsVector(), reinterpret_cast<uint32_t *>(data_));
807 } else if (type_ == DataType::DE_UINT64) {
808 *data = py::array_t<uint64_t>(shape_.AsVector(), reinterpret_cast<uint64_t *>(data_));
809 } else if (type_ == DataType::DE_FLOAT16) {
810 *data = py::array_t<float16>(shape_.AsVector(), reinterpret_cast<float16 *>(data_));
811 } else if (type_ == DataType::DE_FLOAT32) {
812 *data = py::array_t<float>(shape_.AsVector(), reinterpret_cast<float *>(data_));
813 } else if (type_ == DataType::DE_FLOAT64) {
814 *data = py::array_t<double>(shape_.AsVector(), reinterpret_cast<double *>(data_));
815 } else if (type_ == DataType::DE_STRING) {
816 RETURN_IF_NOT_OK(GetDataAsNumpyStrings(data));
817 } else {
818 RETURN_STATUS_UNEXPECTED("Got unexpected type when returning numpy");
819 }
820 return Status::OK();
821 }
GetDataAsNumpyStrings(py::array * data)822 Status Tensor::GetDataAsNumpyStrings(py::array *data) {
823 RETURN_UNEXPECTED_IF_NULL(data);
824 auto itr = begin<std::string_view>();
825 uint64_t max_value = 0;
826 for (; itr != end<std::string_view>(); ++itr) {
827 #if defined(__APPLE__)
828 max_value = fmax((*itr).length(), max_value);
829 #else
830 max_value = std::max((*itr).length(), max_value);
831 #endif
832 }
833 // if all strings are empty, numpy stores a byte for each string |S1
834 max_value = (max_value == 0 ? 1 : max_value);
835 uint64_t total_size = shape_.NumOfElements() * max_value;
836 char *tmp_data = reinterpret_cast<char *>(data_allocator_->allocate(total_size));
837 if (tmp_data == nullptr) {
838 RETURN_STATUS_UNEXPECTED("Cannot create temp array.");
839 }
840 int ret_code = memset_s(tmp_data, total_size, 0, total_size);
841 CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to initialize temp memory");
842
843 itr = begin<std::string_view>();
844 uint64_t i = 0;
845 for (; itr != end<std::string_view>(); itr++, i++) {
846 if (!(*itr).empty()) {
847 ret_code = memcpy_s(tmp_data + i * max_value, total_size, (*itr).data(), (*itr).length());
848 CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to copy string data.");
849 }
850 }
851 auto strides = shape_.Strides();
852 (void)std::transform(strides.begin(), strides.end(), strides.begin(),
853 [&max_value](const auto &s) { return s * max_value; });
854 *data = py::array(py::dtype("S" + std::to_string(max_value)), shape_.AsVector(), strides, tmp_data);
855 RETURN_UNEXPECTED_IF_NULL(data);
856 data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data));
857 return Status::OK();
858 }
859 #endif
860
Squeeze()861 void Tensor::Squeeze() { shape_ = shape_.Squeeze(); }
862
863 template <typename T>
GetUnsignedIntAt(T * o,const std::vector<dsize_t> & index) const864 Status Tensor::GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const {
865 RETURN_UNEXPECTED_IF_NULL(o);
866 if (data_ == nullptr) {
867 RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
868 }
869 if (!type_.IsLooselyCompatible<T>()) {
870 std::string err = "Template type and Tensor type are not compatible";
871 RETURN_STATUS_UNEXPECTED(err);
872 }
873 switch (type_.value()) {
874 case DataType::DE_UINT8: {
875 uint8_t *ptr = nullptr;
876 RETURN_IF_NOT_OK(GetItemPtr<uint8_t>(&ptr, index));
877 *o = static_cast<T>(*ptr);
878 break;
879 }
880 case DataType::DE_UINT16: {
881 uint16_t *ptr = nullptr;
882 RETURN_IF_NOT_OK(GetItemPtr<uint16_t>(&ptr, index));
883 *o = static_cast<T>(*ptr);
884 break;
885 }
886 case DataType::DE_UINT32: {
887 uint32_t *ptr = nullptr;
888 RETURN_IF_NOT_OK(GetItemPtr<uint32_t>(&ptr, index));
889 *o = static_cast<T>(*ptr);
890 break;
891 }
892 case DataType::DE_UINT64: {
893 uint64_t *ptr = nullptr;
894 RETURN_IF_NOT_OK(GetItemPtr<uint64_t>(&ptr, index));
895 *o = static_cast<T>(*ptr);
896 break;
897 }
898 default:
899 std::string err = "Tensor Type is not an unsigned Integer";
900 RETURN_STATUS_UNEXPECTED(err);
901 }
902 return Status::OK();
903 }
904
905 template <typename T>
GetSignedIntAt(T * o,const std::vector<dsize_t> & index) const906 Status Tensor::GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const {
907 RETURN_UNEXPECTED_IF_NULL(o);
908 if (data_ == nullptr) {
909 RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
910 }
911 if (!type_.IsLooselyCompatible<T>()) {
912 std::string err = "Template type and Tensor type are not compatible";
913 RETURN_STATUS_UNEXPECTED(err);
914 }
915 switch (type_.value()) {
916 case DataType::DE_INT8: {
917 int8_t *ptr = nullptr;
918 RETURN_IF_NOT_OK(GetItemPtr<int8_t>(&ptr, index));
919 *o = static_cast<T>(*ptr);
920 break;
921 }
922 case DataType::DE_INT16: {
923 int16_t *ptr = nullptr;
924 RETURN_IF_NOT_OK(GetItemPtr<int16_t>(&ptr, index));
925 *o = static_cast<T>(*ptr);
926 break;
927 }
928 case DataType::DE_INT32: {
929 int32_t *ptr = nullptr;
930 RETURN_IF_NOT_OK(GetItemPtr<int32_t>(&ptr, index));
931 *o = static_cast<T>(*ptr);
932 break;
933 }
934 case DataType::DE_INT64: {
935 int64_t *ptr = nullptr;
936 RETURN_IF_NOT_OK(GetItemPtr<int64_t>(&ptr, index));
937 *o = static_cast<T>(*ptr);
938 break;
939 }
940 default:
941 std::string err = "Tensor Type is not a signed Integer";
942 RETURN_STATUS_UNEXPECTED(err);
943 }
944 return Status::OK();
945 }
946
947 template <typename T>
GetFloatAt(T * o,const std::vector<dsize_t> & index) const948 Status Tensor::GetFloatAt(T *o, const std::vector<dsize_t> &index) const {
949 RETURN_UNEXPECTED_IF_NULL(o);
950 if (data_ == nullptr) {
951 RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
952 }
953 if (!type_.IsLooselyCompatible<T>()) {
954 std::string err = "Template type and Tensor type are not compatible";
955 RETURN_STATUS_UNEXPECTED(err);
956 }
957 switch (type_.value()) {
958 case DataType::DE_FLOAT16: {
959 float16 *ptr = nullptr;
960 RETURN_IF_NOT_OK(GetItemPtr<float16>(&ptr, index));
961 *o = static_cast<T>(*ptr);
962 break;
963 }
964 case DataType::DE_FLOAT32: {
965 float *ptr = nullptr;
966 RETURN_IF_NOT_OK(GetItemPtr<float>(&ptr, index));
967 *o = static_cast<T>(*ptr);
968 break;
969 }
970 case DataType::DE_FLOAT64: {
971 double *ptr = nullptr;
972 RETURN_IF_NOT_OK(GetItemPtr<double>(&ptr, index));
973 *o = static_cast<T>(*ptr);
974 break;
975 }
976 default:
977 std::string err = "Tensor Type is not a float/double";
978 RETURN_STATUS_UNEXPECTED(err);
979 }
980 return Status::OK();
981 }
GetStringAt(dsize_t index,uchar ** string_start,offset_t * length) const982 Status Tensor::GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const {
983 CHECK_FAIL_RETURN_UNEXPECTED(type_ == DataType::DE_STRING, "Type is not string");
984 RETURN_UNEXPECTED_IF_NULL(data_);
985 RETURN_UNEXPECTED_IF_NULL(string_start);
986 RETURN_UNEXPECTED_IF_NULL(length);
987 auto *offset_ptr = reinterpret_cast<offset_t *>(data_); // offsets starts here
988 offset_t start = offset_ptr[index];
989 *string_start = data_ + start;
990 *length = offset_ptr[index + 1] - start - 1; // -1 to skip the \0 from the string length
991 return Status::OK();
992 }
CopyLastDimAt(const std::shared_ptr<Tensor> & src,const std::vector<dsize_t> & index)993 Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index) {
994 RETURN_UNEXPECTED_IF_NULL(src);
995 CHECK_FAIL_RETURN_UNEXPECTED(src->type() == type_, "Source Tensor has a different type");
996 CHECK_FAIL_RETURN_UNEXPECTED(index.back() == 0, "Last dim in index should be 0");
997
998 uint8_t type_size = type_.SizeInBytes();
999 size_t len = std::min(src->shape()[-1], shape_[-1]) * type_size;
1000 dsize_t src_flat_ind = 0, dst_flat_ind = 0;
1001 RETURN_IF_NOT_OK(src->shape().ToFlatIndex(index, &src_flat_ind));
1002 RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &dst_flat_ind));
1003
1004 const unsigned char *src_addr = src->GetBuffer() + src_flat_ind * type_size;
1005 unsigned char *dst_addr = GetMutableBuffer() + dst_flat_ind * type_size;
1006 CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(dst_addr, len, src_addr, len) == 0, "memcpy error");
1007 return Status::OK();
1008 }
1009
GetSliceOption(const SliceOption & slice_option,const int32_t & slice_index,SliceOption * slice_option_ptr)1010 Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &slice_index,
1011 SliceOption *slice_option_ptr) {
1012 RETURN_UNEXPECTED_IF_NULL(slice_option_ptr);
1013 if (slice_option.indices_.empty() && !slice_option.slice_.valid()) {
1014 RETURN_STATUS_UNEXPECTED("Both indices and slices can not be empty.");
1015 }
1016
1017 if (!slice_option.indices_.empty() && slice_option.slice_.valid()) {
1018 RETURN_STATUS_UNEXPECTED("Both indices and slices can not be given.");
1019 }
1020
1021 CHECK_FAIL_RETURN_UNEXPECTED(shape_.Size() > slice_index, "Invalid shape, should greater than slices index.");
1022 // if slice object was provided, indices should be empty. Generate indices from the slice object.
1023 if (slice_option.indices_.empty()) {
1024 // check if slice is valid
1025 mindspore::dataset::Slice slice_copy = slice_option.slice_;
1026 slice_copy.start_ = HandleNeg(slice_option.slice_.start_, shape_[slice_index]);
1027 slice_copy.stop_ = HandleNeg(slice_option.slice_.stop_, shape_[slice_index]);
1028 slice_copy.start_ = slice_copy.start_ < 0 ? 0 : slice_copy.start_;
1029 slice_copy.stop_ = slice_copy.stop_ < 0 ? 0 : slice_copy.stop_;
1030 dsize_t max_idx = shape_[slice_index];
1031 slice_copy.start_ = slice_copy.start_ > max_idx ? max_idx : slice_copy.start_;
1032 slice_copy.stop_ = slice_copy.stop_ > max_idx ? max_idx : slice_copy.stop_;
1033 *slice_option_ptr = SliceOption(slice_copy);
1034 } else {
1035 // indices validation
1036 std::vector<dsize_t> indices_copy;
1037 for (int j = 0; j < slice_option.indices_.size(); j++) {
1038 dsize_t index = HandleNeg(slice_option.indices_[j], shape_[slice_index]);
1039 CHECK_FAIL_RETURN_UNEXPECTED(index < shape_[slice_index] && index >= 0,
1040 "Index " + std::to_string(index) + " is out of bounds.");
1041 indices_copy.emplace_back(index);
1042 }
1043 *slice_option_ptr = SliceOption(indices_copy);
1044 }
1045 return Status::OK();
1046 }
1047
Slice(std::shared_ptr<Tensor> * out,const std::vector<SliceOption> slice_options_)1048 Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption> slice_options_) {
1049 RETURN_UNEXPECTED_IF_NULL(out);
1050 std::vector<SliceOption> converted_slice_objects;
1051
1052 CHECK_FAIL_RETURN_UNEXPECTED(slice_options_.size() <= static_cast<size_t>(std::numeric_limits<dsize_t>::max()),
1053 "The size of slice_options_ must not be more than \"INT64_MAX\".");
1054 for (size_t k = 0; k < slice_options_.size(); k++) {
1055 SliceOption slice_option = slice_options_[k];
1056
1057 if (slice_option.all_) {
1058 mindspore::dataset::Slice slice = mindspore::dataset::Slice(shape_[static_cast<dsize_t>(k)]);
1059 converted_slice_objects.push_back(SliceOption(slice));
1060 continue;
1061 }
1062
1063 CHECK_FAIL_RETURN_UNEXPECTED(k <= static_cast<size_t>(std::numeric_limits<int32_t>::max()),
1064 "GetSliceOption() can't function properly if there are "
1065 "more than \"INT32_MAX\" slice options");
1066 SliceOption slice_option_item(false);
1067 RETURN_IF_NOT_OK(GetSliceOption(slice_option, static_cast<int32_t>(k), &slice_option_item));
1068 converted_slice_objects.emplace_back(slice_option_item);
1069 }
1070
1071 // partial slices, pass in the rest
1072 if (slice_options_.size() != Rank()) {
1073 for (dsize_t j = static_cast<dsize_t>(slice_options_.size()); j < Rank(); j++) {
1074 mindspore::dataset::Slice slice = mindspore::dataset::Slice(0, shape_[j]);
1075 converted_slice_objects.emplace_back(SliceOption(slice));
1076 }
1077 }
1078
1079 // determine final shape:
1080 TensorShape t = TensorShape({});
1081 dsize_t slice_len = slice_options_.size();
1082 dsize_t slice_len_ind;
1083 for (int i = 0; i < shape_.Rank(); i++) {
1084 if (i < slice_len) {
1085 // if it's a slice
1086 if (converted_slice_objects[i].indices_.size() == 0 && converted_slice_objects[i].slice_.step_ != 0) {
1087 slice_len_ind = (converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) /
1088 converted_slice_objects[i].slice_.step_;
1089 if ((converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) %
1090 converted_slice_objects[i].slice_.step_ !=
1091 0) {
1092 slice_len_ind++;
1093 }
1094 // account for slices that would return no data
1095 slice_len_ind = slice_len_ind < 0 ? 0 : slice_len_ind;
1096 t = t.AppendDim(slice_len_ind);
1097 } else {
1098 // if its a vector of indices
1099 // need to introduce a way of handling indices and slices
1100 if (converted_slice_objects[i].indices_.size() >= 1) {
1101 t = t.AppendDim(converted_slice_objects[i].indices_.size());
1102 }
1103 }
1104 } else {
1105 // add in the rest of the dimensions
1106 slice_len_ind = shape_[i];
1107 t = t.AppendDim(slice_len_ind);
1108 }
1109 }
1110
1111 std::vector<std::vector<dsize_t>> indices_vector = IndexGenerator(converted_slice_objects);
1112
1113 if (indices_vector.empty()) {
1114 return CreateEmpty(t, type_, out);
1115 }
1116 if (type_.IsNumeric()) {
1117 return SliceNumeric(out, indices_vector, t);
1118 } else {
1119 return SliceString(out, indices_vector, t);
1120 }
1121 }
1122
SliceNumeric(std::shared_ptr<Tensor> * out,const std::vector<std::vector<dsize_t>> & indices,const TensorShape & shape)1123 Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
1124 const TensorShape &shape) {
1125 RETURN_UNEXPECTED_IF_NULL(out);
1126 RETURN_IF_NOT_OK(CreateEmpty(shape, type_, out));
1127
1128 RETURN_UNEXPECTED_IF_NULL(out);
1129 (*out)->GetMutableBuffer();
1130 dsize_t out_index = 0;
1131 std::vector<dsize_t> dim_length = shape_.AsVector();
1132 dsize_t type_size = type_.SizeInBytes();
1133 std::vector<dsize_t> src_start = HandleNegIndices(indices[0], dim_length);
1134 dsize_t src_start_index;
1135 RETURN_IF_NOT_OK(shape_.ToFlatIndex(src_start, &src_start_index));
1136
1137 uchar *dst_addr = (*out)->data_;
1138 dsize_t count = 1;
1139
1140 // to handle partial slices
1141 dsize_t current_stride = shape_.Strides()[indices[0].size() - 1];
1142 dsize_t indices_size = static_cast<dsize_t>(indices.size());
1143 for (dsize_t i = 0; i < indices_size; i++) {
1144 std::vector<dsize_t> cur_index = HandleNegIndices(indices[i], dim_length);
1145 if (i < indices_size - 1) {
1146 std::vector<dsize_t> next_index = HandleNegIndices(indices[i + 1], dim_length);
1147 dsize_t flat_idx_curr;
1148 dsize_t flat_idx_next;
1149
1150 RETURN_IF_NOT_OK(shape_.ToFlatIndex(cur_index, &flat_idx_curr));
1151 RETURN_IF_NOT_OK(shape_.ToFlatIndex(next_index, &flat_idx_next));
1152
1153 if (flat_idx_next == flat_idx_curr + current_stride) {
1154 count++;
1155 continue;
1156 }
1157 }
1158
1159 int return_code = memcpy_s(dst_addr + out_index * type_size, (*out)->SizeInBytes(),
1160 data_ + src_start_index * type_size, count * type_size * current_stride);
1161 CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed in SliceNumeric");
1162 out_index += count * current_stride;
1163 if (i < indices_size - 1) {
1164 src_start = HandleNegIndices(indices[i + 1], dim_length); // next index
1165 RETURN_IF_NOT_OK(shape_.ToFlatIndex(src_start, &src_start_index));
1166 }
1167 count = 1;
1168 }
1169 return Status::OK();
1170 }
SliceString(std::shared_ptr<Tensor> * out,const std::vector<std::vector<dsize_t>> & indices,const TensorShape & shape)1171 Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
1172 const TensorShape &shape) {
1173 RETURN_UNEXPECTED_IF_NULL(out);
1174 std::vector<dsize_t> dim_length = shape_.AsVector();
1175 std::vector<std::string> strings;
1176
1177 for (std::vector<dsize_t> index : indices) {
1178 std::vector<dsize_t> cur_index = HandleNegIndices(index, dim_length);
1179 dsize_t cur_flat_index;
1180 RETURN_IF_NOT_OK(shape_.ToFlatIndex(cur_index, &cur_flat_index));
1181 std::string_view sv;
1182 RETURN_IF_NOT_OK(GetItemAt(&sv, {cur_index}));
1183 strings.emplace_back(sv);
1184 }
1185 return CreateFromVector(strings, shape, out);
1186 }
CreateFromMSTensor(const MSTensor & in,TensorPtr * out)1187 Status Tensor::CreateFromMSTensor(const MSTensor &in, TensorPtr *out) {
1188 if (in.Data().get() == nullptr) {
1189 *out = nullptr;
1190 return Status::OK();
1191 }
1192 return Tensor::CreateFromMemory(TensorShape(in.Shape()), MSTypeToDEType(static_cast<TypeId>(in.DataType())),
1193 (const uchar *)(in.Data().get()), in.DataSize(), out);
1194 }
1195
1196 } // namespace dataset
1197 } // namespace mindspore
1198