1 /**
2 * Copyright 2020-2024 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "minddata/dataset/core/tensor.h"
17
18 #include <iomanip>
19 #include <iostream>
20 #include <fstream>
21 #include <functional>
22 #include <limits>
23 #include <memory>
24 #include <vector>
25 #include <utility>
26
27 #ifndef ENABLE_ANDROID
28 #include "minddata/dataset/core/cv_tensor.h"
29 #endif
30 #include "minddata/dataset/core/global_context.h"
31 #ifdef ENABLE_PYTHON
32 #include "minddata/dataset/core/pybind_support.h"
33 #endif
34 #include "minddata/dataset/core/tensor_shape.h"
35 #include "minddata/dataset/core/type_id.h"
36 #include "minddata/dataset/include/dataset/constants.h"
37 #include "minddata/dataset/util/validators.h"
38 #include "utils/ms_utils.h"
39
40 #ifdef ENABLE_PYTHON
41 namespace py = pybind11;
42 #endif
43
44 namespace mindspore {
45 namespace dataset {
46 // Helper macros for printing tensor elements
47 #define CASE_PRINT(de_type, native_type) \
48 case de_type: { \
49 native_type o; \
50 rc = GetItemAt<native_type>(&o, index); \
51 out << o; \
52 break; \
53 }
54
55 #define CASE_PRINT_HEX(de_type, native_type) \
56 case de_type: { \
57 native_type o; \
58 rc = GetItemAt<native_type>(&o, index); \
59 out << std::hex << std::setw(2) << std::setfill('0') << o << std::dec << std::setfill(' '); \
60 break; \
61 }
62
Tensor(TensorShape shape,DataType type)63 Tensor::Tensor(TensorShape shape, DataType type) : shape_(std::move(shape)), type_(type), data_(nullptr) {}
64
Tensor(Tensor && other)65 Tensor::Tensor(Tensor &&other) noexcept
66 : shape_(std::move(other.shape_)), type_(other.type_), data_(other.data_), data_end_(other.data_end_) {
67 #ifdef ENABLE_PYTHON
68 if (type_.value() == DataType::DE_PYTHON) {
69 py::gil_scoped_acquire gil_acquire;
70 python_dict_ = std::move(other.python_dict_);
71 }
72 // If other.python_array_ has value, assign it to this->python_array_
73 if (static_cast<bool>(other.python_array_)) {
74 py::gil_scoped_acquire gil_acquire;
75 python_array_ = (other.python_array_);
76 }
77 #endif
78 other.Invalidate();
79 }
80
operator =(Tensor && other)81 Tensor &Tensor::operator=(Tensor &&other) noexcept {
82 if (&other != this) {
83 shape_ = std::move(other.shape_);
84 type_ = other.type_;
85 data_ = other.data_;
86 data_end_ = other.data_end_;
87 yuv_shape_ = std::move(other.yuv_shape_);
88 #ifdef ENABLE_PYTHON
89 if (type_.value() == DataType::DE_PYTHON) {
90 py::gil_scoped_acquire gil_acquire;
91 python_dict_ = std::move(other.python_dict_);
92 }
93 // If other.python_array_ has value, assign it to this->python_array_
94 if (static_cast<bool>(other.python_array_)) {
95 py::gil_scoped_acquire gil_acquire;
96 python_array_ = (other.python_array_);
97 }
98 #endif
99 other.Invalidate();
100 }
101 return *this;
102 }
103
CreateEmpty(const TensorShape & shape,const DataType & type,TensorPtr * out)104 Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out) {
105 RETURN_UNEXPECTED_IF_NULL(out);
106 CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Failed to create empty tensor, tensor shape is unknown.");
107 CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Failed to create empty tensor, data type is unknown.");
108 *out = std::make_shared<Tensor>(shape, type);
109 CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Failed to create empty tensor, allocate memory failed.");
110 // if it's a string tensor and it has no elements, Just initialize the shape and type.
111 if (!type.IsNumeric()) {
112 if (shape.NumOfElements() == 0) {
113 return Status::OK();
114 } else {
115 RETURN_STATUS_UNEXPECTED(
116 "Failed to create empty tensor, number of elements should be 0 when data type is string.");
117 }
118 }
119
120 int64_t byte_size = (*out)->SizeInBytes();
121
122 // Don't allocate if we have a tensor with no elements.
123 if (byte_size != 0) {
124 RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
125 }
126 return Status::OK();
127 }
128
CreateFromMemory(const TensorShape & shape,const DataType & type,const uchar * src,TensorPtr * out)129 Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) {
130 RETURN_IF_NOT_OK(CreateEmpty(shape, type, out));
131 if (src != nullptr && out != nullptr) {
132 // Given the shape/type of this tensor, compute the data size and copy in the input bytes.
133 int64_t byte_size = (*out)->SizeInBytes();
134 if (byte_size == 0) {
135 return Status::OK();
136 }
137 std::string err_msg =
138 "Failed to copy data into tensor. If GeneratorDataset(source=Pyfunc, ...) or map(operations=Pyfunc, ...) is "
139 "used, please check whether the memory of the "
140 "Numpy object returned by Pyfunc has been unexpectedly freed. Adding copy.deepcopy(numpy_object) before "
141 "numpy_object returned by Pyfunc maybe solve the issue. For more details, please refer to the FAQ at "
142 "https://www.mindspore.cn/docs/en/master/faq/data_processing.html.";
143 if (byte_size < SECUREC_MEM_MAX_LEN) {
144 int ret_code = memcpy_s((*out)->data_, byte_size, src, byte_size);
145 CHECK_FAIL_RETURN_UNEXPECTED(ret_code == EOK, err_msg);
146 } else {
147 auto ret_code = std::memcpy((*out)->data_, src, byte_size);
148 CHECK_FAIL_RETURN_UNEXPECTED(ret_code == (*out)->data_, err_msg);
149 }
150 }
151 return Status::OK();
152 }
153
CreateFromMemory(const TensorShape & shape,const DataType & type,const uchar * src,const dsize_t & length,TensorPtr * out)154 Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, const dsize_t &length,
155 TensorPtr *out) {
156 RETURN_UNEXPECTED_IF_NULL(out);
157 *out = std::make_shared<Tensor>(shape, type);
158 CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
159 if (type.IsNumeric()) {
160 dsize_t calculated_length = (*out)->SizeInBytes();
161 CHECK_FAIL_RETURN_UNEXPECTED(calculated_length == length, "Length of source data does not match the shape.");
162 } else {
163 // min_length is the length of a tensor with empty strings
164 // min_length = the number of bytes needed to store the offsets + 1 byte for each element
165 dsize_t min_length = (shape.NumOfElements() + 1) * kOffsetSize + shape.NumOfElements();
166 CHECK_FAIL_RETURN_UNEXPECTED(min_length <= length, "Length of source data does not match the shape.");
167 }
168
169 RETURN_IF_NOT_OK((*out)->AllocateBuffer(length));
170 if (length == 0) {
171 return Status::OK();
172 }
173 RETURN_UNEXPECTED_IF_NULL(src); // needs to be here as we may return early without using src content (empty tensors)
174 if (length < SECUREC_MEM_MAX_LEN) {
175 int ret_code = memcpy_s((*out)->data_, length, src, length);
176 CHECK_FAIL_RETURN_UNEXPECTED(ret_code == EOK, "Failed to copy data into tensor.");
177 } else {
178 auto ret_code = std::memcpy((*out)->data_, src, length);
179 CHECK_FAIL_RETURN_UNEXPECTED(ret_code == (*out)->data_, "Failed to copy data into tensor.");
180 }
181
182 return Status::OK();
183 }
184
185 #ifdef ENABLE_PYTHON
CreateFromNpString(py::array arr,std::shared_ptr<Tensor> * out)186 Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
187 RETURN_UNEXPECTED_IF_NULL(out);
188 std::vector<dsize_t> shape;
189 for (size_t i = 0; i < arr.ndim(); i++) {
190 shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
191 }
192 arr.resize({arr.size()}); // flatten the py::array so we can iterate once
193 std::vector<std::string> strings;
194 strings.reserve(arr.size());
195 (void)std::for_each(arr.begin(), arr.end(),
196 [&strings](const auto &s) { strings.emplace_back(py::cast<py::str>(s)); });
197 arr.resize(shape); // resize arr back to the original shape
198
199 if (arr.dtype().kind() == 'U') { // numpy dtype type is "U"
200 RETURN_IF_NOT_OK(CreateFromVector(strings, TensorShape{shape}, DataType(DataType::DE_STRING), out));
201 } else { // numpy dtype type is "S"
202 RETURN_IF_NOT_OK(CreateFromVector(strings, TensorShape{shape}, DataType(DataType::DE_BYTES), out));
203 }
204
205 return Status::OK();
206 }
207
CreateFromNpArray(py::array arr,std::shared_ptr<Tensor> * out)208 Status Tensor::CreateFromNpArray(py::array arr, std::shared_ptr<Tensor> *out) {
209 RETURN_UNEXPECTED_IF_NULL(out);
210 DataType type = DataType::FromNpArray(arr);
211 CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN,
212 "Failed to create tensor from numpy array, data type is unknown.");
213
214 if (type.IsString()) {
215 return CreateFromNpString(arr, out);
216 }
217
218 std::vector<dsize_t> shape;
219 std::vector<dsize_t> strides;
220 // check if strides are contiguous
221 bool is_strided = false;
222 dsize_t count = arr.size();
223 for (dsize_t i = 0; i < arr.ndim(); i++) {
224 shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
225 strides.push_back(static_cast<dsize_t>(arr.strides()[i]));
226 // in case of empty array num_items=0
227 if (count != 0 && shape.size() > i && shape[i] != 0) {
228 count /= shape[i];
229 if (strides[i] != arr.itemsize() * count) {
230 is_strided = true;
231 }
232 }
233 }
234
235 if (is_strided) {
236 unsigned char *data = static_cast<unsigned char *>(arr.request().ptr);
237 RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape(shape), type, out));
238 RETURN_IF_NOT_OK(CopyStridedArray((*out)->data_, data, shape, strides, (*out)->type_.SizeInBytes()));
239 } else {
240 #ifdef ENABLE_PYTHON
241 // here we create empty tensor and use this->python_array_ point to data which is np.ndarray
242 *out = std::make_shared<Tensor>(TensorShape(shape), type);
243 {
244 py::gil_scoped_acquire gil_acquire;
245 (*out)->python_array_ = arr;
246 }
247 unsigned char *data = static_cast<unsigned char *>((*out)->python_array_.request().ptr);
248 int64_t byte_size = (*out)->SizeInBytes();
249 if (byte_size == 0) {
250 return Status::OK();
251 }
252 (*out)->data_ = data;
253 (*out)->data_end_ = data + byte_size;
254 #else
255 RETURN_IF_NOT_OK(Tensor::CreateFromMemory(TensorShape(shape), type, data, out));
256 #endif
257 }
258 return Status::OK();
259 }
260
CreateFromPythonObject(py::object obj,std::shared_ptr<Tensor> * out)261 Status Tensor::CreateFromPythonObject(py::object obj, std::shared_ptr<Tensor> *out) {
262 RETURN_UNEXPECTED_IF_NULL(out);
263 std::vector<dsize_t> shape{};
264 DataType type = DataType(DataType::DE_PYTHON);
265 *out = std::make_shared<Tensor>(TensorShape({0}), type);
266 {
267 py::gil_scoped_acquire gil_acquire;
268 (*out)->python_dict_ = obj;
269
270 // serialize python object to bytes which used by dataset independent process mode
271 (*out)->python_dict_as_str_ = py::str(py::module::import("pickle").attr("dumps")((*out)->python_dict_));
272 (*out)->data_ = reinterpret_cast<unsigned char *>((*out)->python_dict_as_str_.data());
273 (*out)->data_end_ = (*out)->data_ + (*out)->python_dict_as_str_.length();
274 }
275 CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Failed to create a tensor for python object.");
276 return Status::OK();
277 }
278
279 #endif
280
281 #ifndef ENABLE_ANDROID
CreateFromByteList(const dataengine::BytesList & bytes_list,const TensorShape & shape,TensorPtr * out)282 Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out) {
283 RETURN_UNEXPECTED_IF_NULL(out);
284 *out = std::make_shared<Tensor>(TensorShape({static_cast<dsize_t>(bytes_list.value_size())}),
285 DataType(DataType::DE_STRING));
286 CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
287 // total bytes needed = offset array + strings
288 // offset array needs to store one offset var per element + 1 extra to get the length of the last string.
289 // strings will be null-terminated --> need 1 extra byte per element
290 dsize_t num_bytes = (kOffsetSize) * (*out)->shape_.NumOfElements() + kOffsetSize + bytes_list.ByteSizeLong();
291
292 (*out)->data_ = GetAllocator()->allocate(num_bytes);
293
294 auto offset_arr = reinterpret_cast<offset_t *>((*out)->data_);
295 uchar *buf = (*out)->GetStringsBuffer();
296
297 offset_t offset = buf - (*out)->data_; // the first string will start here
298 int32_t i = 0;
299 for (; i < bytes_list.value_size(); i++) {
300 const std::string &str = bytes_list.value(i);
301 // insert the start index of the string.
302 offset_arr[i] = offset;
303 // total bytes are reduced by kOffsetSize
304 num_bytes -= kOffsetSize;
305 // insert actual string
306 int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
307 CHECK_FAIL_RETURN_UNEXPECTED(ret_code == EOK, "Cannot copy string into Tensor");
308 // next string will be stored right after the current one.
309 offset = offset + str.length() + 1;
310 // total bytes are reduced by the length of the string
311 num_bytes -= str.length() + 1;
312 }
313 // store one more offset value so we can get the length of the last string
314 // length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
315 offset_arr[i] = offset;
316
317 (*out)->data_end_ = (*out)->data_ + offset_arr[i];
318
319 MS_ASSERT(num_bytes == 0);
320 RETURN_IF_NOT_OK((*out)->Reshape(shape));
321 return Status::OK();
322 }
323 #endif
324
CreateFromFile(const std::string & path,std::shared_ptr<Tensor> * out)325 Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *out) {
326 RETURN_UNEXPECTED_IF_NULL(out);
327 Path file(path);
328 if (file.IsDirectory()) {
329 RETURN_STATUS_UNEXPECTED("Invalid file found: " + path + ", should be file, but got directory.");
330 }
331 std::ifstream fs;
332 fs.open(path, std::ios::binary | std::ios::in);
333 CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Failed to open file: " + path);
334 int64_t num_bytes = fs.seekg(0, std::ios::end).tellg();
335 if (num_bytes >= kDeMaxDim) {
336 fs.close();
337 RETURN_STATUS_UNEXPECTED("Invalid file to allocate tensor memory, check path: " + path);
338 }
339 if (!fs.seekg(0, std::ios::beg).good()) {
340 fs.close();
341 RETURN_STATUS_UNEXPECTED("Failed to find size of file, check path: " + path);
342 }
343 auto s = Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out);
344 if (s != Status::OK()) {
345 fs.close();
346 return s;
347 }
348 int64_t written_bytes = fs.read(reinterpret_cast<char *>((*out)->GetMutableBuffer()), num_bytes).gcount();
349 if (!(written_bytes == num_bytes && fs.good())) {
350 fs.close();
351 RETURN_STATUS_UNEXPECTED("Error in writing to tensor, check path: " + path);
352 }
353 fs.close();
354 return Status::OK();
355 }
356
357 #ifndef ENABLE_ANDROID
CreateFromByteList(const dataengine::BytesList & bytes_list,const TensorShape & shape,const DataType & type,dsize_t pad_size,TensorPtr * out)358 Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
359 const DataType &type, dsize_t pad_size, TensorPtr *out) {
360 RETURN_UNEXPECTED_IF_NULL(out);
361 RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, type, out));
362
363 RETURN_UNEXPECTED_IF_NULL(out);
364 unsigned char *current_tensor_addr = (*out)->GetMutableBuffer();
365 int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size;
366
367 for (int i = 0; i < bytes_list.value_size(); i++) {
368 // read string data into tensor
369 const std::string ¤t_element = bytes_list.value(i);
370 int return_code =
371 memcpy_s(current_tensor_addr, tensor_bytes_remaining, common::SafeCStr(current_element), current_element.size());
372
373 CHECK_FAIL_RETURN_UNEXPECTED(return_code == EOK, "memcpy_s failed when reading bytesList element into Tensor");
374
375 current_tensor_addr += current_element.size();
376 tensor_bytes_remaining -= current_element.size();
377
378 // pad
379 int64_t chars_to_pad = pad_size - current_element.size();
380 return_code = memset_s(current_tensor_addr, tensor_bytes_remaining, static_cast<int>(' '), chars_to_pad);
381 CHECK_FAIL_RETURN_UNEXPECTED(return_code == EOK, "memcpy_s failed when padding Tensor");
382
383 current_tensor_addr += chars_to_pad;
384 tensor_bytes_remaining -= chars_to_pad;
385 }
386
387 return Status::OK();
388 }
389 #endif
390
391 // Memcpy the given strided array's used part to consecutive memory
392 // Consider a 3-d array
393 // A[(i * shape[1] + j) * shape[2] + k] = B[i][j][k] = C[i * strides[0] + j * strides[1] + k * strides[2]]
394 // Here we convert array C to array A, by memcpy index by index (Note that not all elements in C is copied)
CopyStridedArray(unsigned char * dst,unsigned char * src,std::vector<dsize_t> shape,std::vector<dsize_t> strides,uint8_t type_size)395 Status Tensor::CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
396 std::vector<dsize_t> strides, uint8_t type_size) {
397 RETURN_UNEXPECTED_IF_NULL(dst);
398 RETURN_UNEXPECTED_IF_NULL(src);
399 dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>());
400 for (dsize_t i = 0; i < size; ++i) {
401 dsize_t offset = 0;
402 dsize_t count = i;
403 for (size_t j = 0; j < shape.size(); ++j) {
404 // convert 1d array's index to 3d array's index (A -> B)
405 CHECK_FAIL_RETURN_UNEXPECTED(shape[shape.size() - 1 - j] != 0, "Invalid data, shape can't be zero.");
406 dsize_t idx = count % shape[shape.size() - 1 - j];
407 count /= shape[shape.size() - 1 - j];
408 // calculate the raw data offset based on strides (B -> C)
409 offset += idx * strides[shape.size() - 1 - j];
410 // once count = 0, the following idxes are all zero, skip them
411 if (count == 0) {
412 break;
413 }
414 }
415 // strides already consider byte size of the data type, but dst doesn't.
416 // dst[i] = dst + i * type_size = src + offset
417 int ret_code = memcpy_s(dst + i * type_size, type_size, src + offset, type_size);
418 if (ret_code != EOK) {
419 RETURN_STATUS_UNEXPECTED("Failed to copy data into Tensor.");
420 }
421 }
422 return Status::OK();
423 }
424
425 // Name: Destructor
426 // Description: Destructor
~Tensor()427 Tensor::~Tensor() {
428 #ifdef ENABLE_PYTHON
429 if (!static_cast<bool>(python_array_)) { // the data is not np.ndarray from python layer
430 #endif
431 if (!type().IsPython()) { // The Tensor is a python_dict_
432 if (data_ != nullptr) {
433 if (GetAllocator() != nullptr) {
434 GetAllocator()->deallocate(data_);
435 data_ = nullptr;
436 data_end_ = nullptr;
437 } else {
438 // If we didn't have an allocator, but data_ is not null then it must
439 // be a stand-alone tensor that used malloc directly.
440 free(data_);
441 data_ = nullptr;
442 data_end_ = nullptr;
443 }
444 }
445 }
446 #ifdef ENABLE_PYTHON
447 } else {
448 // release the data from python layer
449 py::gil_scoped_acquire gil_acquire;
450 python_array_ = py::none(); // let borrowed python ndarray ref - 1
451 }
452 #endif
453 #ifdef ENABLE_PYTHON
454 try {
455 // The default destructor will not acquire the Python GIL when it destructs
456 // the class members, so we need to handle py::object manually.
457 if (Py_IsInitialized() > 0) {
458 if (static_cast<bool>(python_dict_)) {
459 // Acquire Python GIL
460 py::gil_scoped_acquire gil_acquire;
461 // We need to reduce the reference count of the py::object to which python_dict_
462 // refers by 1, then break that reference relationship, otherwise the default
463 // destructor will destruct that py::object again while recycling class member
464 // python_dict_. A simple assignment to None satisfies all of the above.
465 python_dict_ = py::none();
466 }
467 }
468 } catch (const py::error_already_set &e) {
469 // ignore exceptions as everything could be shutting down at this point
470 }
471 #endif
472 } // namespace dataset
473
operator ==(const Tensor & rhs) const474 bool Tensor::operator==(const Tensor &rhs) const {
475 #ifdef ENABLE_PYTHON
476 if (type_.value() == DataType::DE_PYTHON) { // we are holding a python object
477 if (static_cast<bool>(python_dict_) && static_cast<bool>(rhs.python_dict_) && python_dict_ == rhs.python_dict_) {
478 return true;
479 }
480 return false;
481 }
482 #endif
483 // 1. different shape 2. different type 3. one data_ is nullptr and the other is not
484 if (shape_ != rhs.shape() || type_ != rhs.type_ || (data_ == nullptr && rhs.data_ != nullptr) ||
485 (data_ != nullptr && rhs.data_ == nullptr)) {
486 return false;
487 }
488 if (data_ == nullptr && rhs.data_ == nullptr) {
489 return true;
490 }
491 // use mem compare to compare the two data, size are already verified
492 return memcmp(data_, rhs.data_, SizeInBytes()) == 0;
493 }
494
495 // Name: PrintItemAt()
496 // Description: A function that print the value as specified by its index
PrintItemAt(const std::vector<dsize_t> & index,std::ostream & out) const497 void Tensor::PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const {
498 Status rc;
499 MS_ASSERT(data_);
500
501 switch (type_.value()) {
502 CASE_PRINT_HEX(DataType::DE_BOOL, bool)
503
504 CASE_PRINT_HEX(DataType::DE_INT8, int8_t)
505
506 CASE_PRINT_HEX(DataType::DE_UINT8, uint8_t)
507
508 CASE_PRINT(DataType::DE_INT16, int16_t)
509
510 CASE_PRINT(DataType::DE_UINT16, uint16_t)
511
512 CASE_PRINT(DataType::DE_INT32, int32_t)
513
514 CASE_PRINT(DataType::DE_UINT32, uint32_t)
515
516 CASE_PRINT(DataType::DE_INT64, int64_t)
517
518 CASE_PRINT(DataType::DE_UINT64, uint64_t)
519
520 CASE_PRINT(DataType::DE_FLOAT16, float16)
521
522 CASE_PRINT(DataType::DE_FLOAT32, float)
523
524 CASE_PRINT(DataType::DE_FLOAT64, double)
525
526 case DataType::DE_STRING: {
527 std::string_view o{""};
528 rc = GetItemAt(&o, index);
529 out << "\"" << o << "\"";
530 break;
531 }
532 default: {
533 out << "?";
534 break;
535 }
536 }
537 if (rc.IsError()) {
538 out << rc.ToString();
539 }
540 }
541
542 // Name: PrintRecursive()
543 // Description: A function that prints Tensor recursively, first called by print
PrintRecursive(std::ostream & out,int32_t cur_dim,const std::vector<dsize_t> & cur_index) const544 void Tensor::PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const {
545 if (cur_index.size() == shape_.Rank()) {
546 PrintItemAt(cur_index, out);
547 } else {
548 out << "[";
549 for (dsize_t i = 0; i < shape_[cur_dim]; i++) {
550 std::vector<dsize_t> new_index = cur_index;
551 new_index.push_back(i);
552 PrintRecursive(out, cur_dim + 1, new_index);
553 if (i < shape_[cur_dim] - 1) {
554 out << ",";
555 }
556 }
557 out << "]";
558 }
559 }
560
561 // Name: Print()
562 // Description: A function that prints info about the tensor
Print(std::ostream & out) const563 void Tensor::Print(std::ostream &out) const {
564 out << "Tensor (shape: ";
565 out << shape_;
566 out << ", Type: " << type_ << ")\n";
567 if (data_) {
568 PrintRecursive(out, 0, std::vector<dsize_t>{});
569 #ifdef ENABLE_PYTHON
570 } else if (static_cast<bool>(python_dict_)) {
571 std::string s;
572 {
573 py::gil_scoped_acquire gil_acquire;
574 s = py::str(python_dict_);
575 }
576 out << s;
577 #endif
578 } else {
579 out << "[Data area is null]";
580 }
581 }
582
PrintData(std::ostream & out) const583 void Tensor::PrintData(std::ostream &out) const {
584 if (data_) {
585 PrintRecursive(out, 0, std::vector<dsize_t>{});
586 }
587 }
588
AllocateBuffer(const dsize_t & length)589 Status Tensor::AllocateBuffer(const dsize_t &length) {
590 RETURN_UNEXPECTED_IF_NULL(GetAllocator());
591 if (data_ == nullptr) {
592 data_ = GetAllocator()->allocate(length);
593 CHECK_FAIL_RETURN_UNEXPECTED(data_ != nullptr, "Failed to allocate memory for tensor.");
594 data_end_ = data_ + length;
595 }
596 return Status::OK();
597 }
598
Reshape(const TensorShape & shape)599 Status Tensor::Reshape(const TensorShape &shape) {
600 if (shape.NumOfElements() == shape_.NumOfElements()) {
601 shape_ = shape;
602 return Status::OK();
603 } else {
604 std::string err = "Cannot reshape, Number of elements do not match";
605 RETURN_STATUS_UNEXPECTED(err);
606 }
607 }
608
Invalidate()609 void Tensor::Invalidate() {
610 shape_ = TensorShape::CreateUnknownRankShape();
611 type_ = DataType(DataType::DE_UNKNOWN);
612 data_ = nullptr;
613 data_end_ = nullptr;
614 #ifdef ENABLE_PYTHON
615 if (type_.value() == DataType::DE_PYTHON) {
616 py::gil_scoped_acquire gil_acquire;
617 python_dict_ = py::none();
618 }
619 if (static_cast<bool>(python_array_)) {
620 py::gil_scoped_acquire gil_acquire;
621 python_array_ = py::none(); // let borrowed python ndarray ref - 1
622 }
623 #endif
624 }
625
626 template <typename T>
GetItemPtr(T ** ptr,const std::vector<dsize_t> & index) const627 Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
628 RETURN_UNEXPECTED_IF_NULL(ptr);
629 if (type_.IsCompatible<T>()) {
630 if (data_ == nullptr) {
631 std::string err = "Data is not allocated yet";
632 RETURN_STATUS_UNEXPECTED(err);
633 }
634 dsize_t flat_idx;
635 RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
636 *ptr = reinterpret_cast<T *>(data_ + flat_idx * type_.SizeInBytes());
637 RETURN_UNEXPECTED_IF_NULL(*ptr);
638
639 return Status::OK();
640 } else {
641 std::string err = "data type not compatible";
642 RETURN_STATUS_UNEXPECTED(err);
643 }
644 }
645
GetItemPtr(uchar ** ptr,const std::vector<dsize_t> & index,offset_t * length) const646 Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset_t *length) const {
647 RETURN_UNEXPECTED_IF_NULL(ptr);
648 RETURN_UNEXPECTED_IF_NULL(length);
649 if (type_.IsString()) {
650 if (data_ == nullptr) {
651 std::string err = "Data is not allocated yet";
652 RETURN_STATUS_UNEXPECTED(err);
653 }
654 dsize_t flat_idx;
655 RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
656 offset_t length_temp = 0;
657 RETURN_IF_NOT_OK(GetStringAt(flat_idx, ptr, &length_temp));
658 *length = length_temp;
659 return Status::OK();
660 } else {
661 std::string err = "data type not compatible";
662 RETURN_STATUS_UNEXPECTED(err);
663 }
664 }
665
StartAddrOfIndex(std::vector<dsize_t> ind,uchar ** start_addr_of_index,TensorShape * remaining)666 Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining) {
667 RETURN_UNEXPECTED_IF_NULL(start_addr_of_index);
668 RETURN_UNEXPECTED_IF_NULL(remaining);
669 if (type().IsString()) {
670 RETURN_STATUS_UNEXPECTED("StartAddrOfIndex does not support string and bytes tensors yet.");
671 }
672
673 dsize_t flat_ind;
674 std::vector<dsize_t> t_shape = shape().AsVector();
675 std::vector<dsize_t> r(t_shape.begin() + ind.size(), t_shape.end());
676 *remaining = TensorShape(r);
677 ind.resize(this->Rank(), 0); // same as -> while (ind.size() < this->Rank()) ind.push_back(0);
678
679 RETURN_IF_NOT_OK(shape_.ToFlatIndex(ind, &flat_ind));
680 // check if GetBuffer() returns null, we should flag this as an error, this sanity check will only
681 // be true is the tensor failed to allocate memory.
682 if (GetMutableBuffer() == nullptr) {
683 RETURN_STATUS_UNEXPECTED("Invalid GetBuffer in Tensor, got nullptr");
684 }
685 *start_addr_of_index = GetMutableBuffer() + flat_ind * this->type().SizeInBytes();
686 return Status::OK();
687 }
688
InsertTensor(const std::vector<dsize_t> & ind,const std::shared_ptr<Tensor> & tensor,const bool partial_insert)689 Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor,
690 const bool partial_insert) {
691 RETURN_UNEXPECTED_IF_NULL(tensor);
692 std::string err_msg;
693 if (partial_insert) {
694 err_msg += (ind.size() != 1)
695 ? "[Tensor] only supports 1D insertion of elements not along the full length of the axis\n"
696 : "";
697 err_msg +=
698 (ind.at(0) + tensor->shape().NumOfElements() > shape().NumOfElements()) ? "[Tensor] incorrect index\n" : "";
699 } else {
700 err_msg += (ind.size() + tensor->Rank() != Rank()) ? "[Tensor] incorrect index\n" : "";
701 }
702 err_msg += (type().IsString()) ? "[Tensor] Cannot insert into a tensor of type string or bytes\n" : "";
703 err_msg += (!shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : "";
704
705 err_msg += tensor->type().SizeInBytes() != type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : "";
706 uchar *start_addr_of_ind = nullptr;
707 if (partial_insert) {
708 TensorShape remaining_shape = tensor->shape();
709 err_msg +=
710 (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : "";
711 } else {
712 TensorShape remaining_shape = TensorShape::CreateUnknownRankShape();
713 err_msg +=
714 (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : "";
715 err_msg += !(remaining_shape == tensor->shape()) ? "[Tensor] memory error\n" : "";
716 }
717
718 if (!err_msg.empty()) {
719 MS_LOG(DEBUG) << "Insert tensor message: " << err_msg;
720 RETURN_STATUS_UNEXPECTED(err_msg);
721 } else {
722 if (start_addr_of_ind != nullptr) {
723 int ret_code =
724 memcpy_s(start_addr_of_ind, tensor->SizeInBytes(), tensor->GetMutableBuffer(), tensor->SizeInBytes());
725 if (ret_code == EOK) {
726 return Status::OK();
727 } else {
728 err_msg += "[Tensor] error in memcpy_s when inserting tensor\n";
729 MS_LOG(DEBUG) << "Tensor message: " << err_msg;
730 RETURN_STATUS_UNEXPECTED(err_msg);
731 }
732 } else {
733 RETURN_STATUS_UNEXPECTED("Failed to create memory for Tensor.");
734 }
735 }
736 }
737
ExpandDim(const dsize_t & axis)738 Status Tensor::ExpandDim(const dsize_t &axis) {
739 if (axis > Rank()) {
740 std::string err = "Axis is out of bound";
741 RETURN_STATUS_UNEXPECTED(err);
742 }
743 if (axis == Rank()) {
744 shape_ = shape_.AppendDim(1);
745 } else {
746 shape_ = shape_.InsertDim(axis, 1);
747 }
748 return Status::OK();
749 }
750
Strides() const751 std::vector<dsize_t> Tensor::Strides() const {
752 std::vector<dsize_t> strides = shape_.Strides();
753 uint8_t size = type_.SizeInBytes();
754 (void)std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; });
755 return strides;
756 }
757
758 #ifdef ENABLE_PYTHON
GetBufferInfo(Tensor * t,py::buffer_info * out)759 Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
760 RETURN_UNEXPECTED_IF_NULL(t);
761 RETURN_UNEXPECTED_IF_NULL(out);
762 CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings or bytes.");
763
764 std::string format_desc = t->type().GetPybindFormat();
765 if (format_desc.empty()) {
766 RETURN_STATUS_UNEXPECTED("Cannot convert DE type to pybind format");
767 }
768 *out = py::buffer_info(t->GetMutableBuffer(), /* Pointer to buffer */
769 t->type().SizeInBytes(), /* Size of one scalar */
770 format_desc, /* Python struct-style format descriptor */
771 t->Rank(), /* Number of dimensions */
772 t->shape().AsVector(), /* Buffer dimensions */
773 t->Strides());
774 return Status::OK();
775 }
776 #endif
777
to_json(nlohmann::json * out_json)778 Status Tensor::to_json(nlohmann::json *out_json) {
779 nlohmann::json args;
780 args["shape"] = shape_.AsVector();
781 args["type"] = type_.ToString();
782 if (type_ == DataType::DE_BOOL) {
783 RETURN_IF_NOT_OK(to_json_convert<bool>(&args));
784 } else if (type_ == DataType::DE_INT8) {
785 RETURN_IF_NOT_OK(to_json_convert<int8_t>(&args));
786 } else if (type_ == DataType::DE_INT16) {
787 RETURN_IF_NOT_OK(to_json_convert<int16_t>(&args));
788 } else if (type_ == DataType::DE_INT32) {
789 RETURN_IF_NOT_OK(to_json_convert<int32_t>(&args));
790 } else if (type_ == DataType::DE_INT64) {
791 RETURN_IF_NOT_OK(to_json_convert<int64_t>(&args));
792 } else if (type_ == DataType::DE_UINT8) {
793 RETURN_IF_NOT_OK(to_json_convert<uint8_t>(&args));
794 } else if (type_ == DataType::DE_UINT16) {
795 RETURN_IF_NOT_OK(to_json_convert<uint16_t>(&args));
796 } else if (type_ == DataType::DE_UINT32) {
797 RETURN_IF_NOT_OK(to_json_convert<uint32_t>(&args));
798 } else if (type_ == DataType::DE_UINT64) {
799 RETURN_IF_NOT_OK(to_json_convert<uint64_t>(&args));
800 } else if (type_ == DataType::DE_FLOAT32) {
801 RETURN_IF_NOT_OK(to_json_convert<float>(&args));
802 } else if (type_ == DataType::DE_FLOAT64) {
803 RETURN_IF_NOT_OK(to_json_convert<double>(&args));
804 } else if (type_.IsString()) {
805 std::vector<std::string> data_out;
806 for (auto it = this->begin<std::string_view>(); it != this->end<std::string_view>(); ++it) {
807 data_out.emplace_back(*it);
808 }
809 args["data"] = data_out;
810 } else {
811 return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor");
812 }
813 *out_json = args;
814 return Status::OK();
815 }
816
817 template <typename T>
to_json_convert(nlohmann::json * args)818 Status Tensor::to_json_convert(nlohmann::json *args) {
819 std::vector<T> data_out;
820 for (auto it = this->begin<T>(); it != this->end<T>(); it++) {
821 data_out.emplace_back(*it);
822 }
823 (*args)["data"] = data_out;
824 return Status::OK();
825 }
826
from_json(nlohmann::json op_params,std::shared_ptr<Tensor> * tensor)827 Status Tensor::from_json(nlohmann::json op_params, std::shared_ptr<Tensor> *tensor) {
828 RETURN_IF_NOT_OK(ValidateParamInJson(op_params, "shape", "Tensor"));
829 RETURN_IF_NOT_OK(ValidateParamInJson(op_params, "type", "Tensor"));
830 RETURN_IF_NOT_OK(ValidateParamInJson(op_params, "data", "Tensor"));
831 std::string type = op_params["type"];
832 std::vector<dsize_t> list = op_params["shape"];
833 TensorShape shape = TensorShape(list);
834 if (type == "bool") {
835 RETURN_IF_NOT_OK(from_json_convert<bool>(op_params["data"], shape, tensor));
836 } else if (type == "int8") {
837 RETURN_IF_NOT_OK(from_json_convert<int8_t>(op_params["data"], shape, tensor));
838 } else if (type == "int16") {
839 RETURN_IF_NOT_OK(from_json_convert<int16_t>(op_params["data"], shape, tensor));
840 } else if (type == "int32") {
841 RETURN_IF_NOT_OK(from_json_convert<int32_t>(op_params["data"], shape, tensor));
842 } else if (type == "int64") {
843 RETURN_IF_NOT_OK(from_json_convert<int64_t>(op_params["data"], shape, tensor));
844 } else if (type == "uint8") {
845 RETURN_IF_NOT_OK(from_json_convert<uint8_t>(op_params["data"], shape, tensor));
846 } else if (type == "uint16") {
847 RETURN_IF_NOT_OK(from_json_convert<uint16_t>(op_params["data"], shape, tensor));
848 } else if (type == "uint32") {
849 RETURN_IF_NOT_OK(from_json_convert<uint32_t>(op_params["data"], shape, tensor));
850 } else if (type == "uint64") {
851 RETURN_IF_NOT_OK(from_json_convert<uint64_t>(op_params["data"], shape, tensor));
852 } else if (type == "float32") {
853 RETURN_IF_NOT_OK(from_json_convert<float>(op_params["data"], shape, tensor));
854 } else if (type == "float64") {
855 RETURN_IF_NOT_OK(from_json_convert<double>(op_params["data"], shape, tensor));
856 } else if (type == "string") {
857 RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, DataType(DataType::DE_STRING), tensor));
858 } else if (type == "bytes") {
859 RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, DataType(DataType::DE_BYTES), tensor));
860 } else {
861 return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor");
862 }
863 return Status::OK();
864 }
865
866 template <typename T>
from_json_convert(const nlohmann::json & json_data,const TensorShape & shape,std::shared_ptr<Tensor> * tensor)867 Status Tensor::from_json_convert(const nlohmann::json &json_data, const TensorShape &shape,
868 std::shared_ptr<Tensor> *tensor) {
869 std::vector<T> data = json_data;
870 RETURN_IF_NOT_OK(CreateFromVector(data, shape, tensor));
871 return Status::OK();
872 }
873
from_json_convert(const nlohmann::json & json_data,const TensorShape & shape,const DataType & type,std::shared_ptr<Tensor> * tensor)874 Status Tensor::from_json_convert(const nlohmann::json &json_data, const TensorShape &shape, const DataType &type,
875 std::shared_ptr<Tensor> *tensor) {
876 std::vector<std::string> data = json_data;
877 RETURN_IF_NOT_OK(CreateFromVector(data, shape, type, tensor));
878 return Status::OK();
879 }
880
881 template <typename T>
GetItemAt(T * o,const std::vector<dsize_t> & index) const882 Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
883 RETURN_UNEXPECTED_IF_NULL(o);
884 if (data_ == nullptr) {
885 RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
886 }
887 if (!type_.IsLooselyCompatible<T>()) {
888 std::string err = "Template type and Tensor type are not compatible";
889 RETURN_STATUS_UNEXPECTED(err);
890 }
891 if (type_.IsUnsignedInt()) {
892 RETURN_IF_NOT_OK(GetUnsignedIntAt<T>(o, index));
893 } else if (type_.IsSignedInt()) {
894 RETURN_IF_NOT_OK(GetSignedIntAt<T>(o, index));
895 } else if (type_.IsFloat()) {
896 RETURN_IF_NOT_OK(GetFloatAt<T>(o, index));
897 } else if (type_.IsBool()) {
898 bool *ptr = nullptr;
899 RETURN_IF_NOT_OK(GetItemPtr<bool>(&ptr, index));
900 *o = static_cast<T>(*ptr);
901 } else {
902 std::string err = "Tensor Type is unknown";
903 RETURN_STATUS_UNEXPECTED(err);
904 }
905 return Status::OK();
906 }
907
GetItemAt(std::string_view * o,const std::vector<dsize_t> & index) const908 Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const {
909 RETURN_UNEXPECTED_IF_NULL(data_);
910 RETURN_UNEXPECTED_IF_NULL(o);
911 CHECK_FAIL_RETURN_UNEXPECTED(type_.IsString(), "Tensor type is not of string or bytes.");
912
913 uchar *start = nullptr;
914 offset_t length = 0;
915 RETURN_IF_NOT_OK(GetItemPtr(&start, index, &length));
916 std::string_view sv{reinterpret_cast<const char *>(start), length};
917 o->swap(sv);
918 return Status::OK();
919 }
920
921 #ifdef ENABLE_PYTHON
922 // return data as numpy, should return status
GetDataAsNumpy(py::array * data)923 Status Tensor::GetDataAsNumpy(py::array *data) {
924 RETURN_UNEXPECTED_IF_NULL(data);
925 if (type_ == DataType::DE_BOOL) {
926 *data = py::array_t<bool>(shape_.AsVector(), reinterpret_cast<bool *>(data_));
927 } else if (type_ == DataType::DE_INT8) {
928 *data = py::array_t<int8_t>(shape_.AsVector(), reinterpret_cast<int8_t *>(data_));
929 } else if (type_ == DataType::DE_INT16) {
930 *data = py::array_t<int16_t>(shape_.AsVector(), reinterpret_cast<int16_t *>(data_));
931 } else if (type_ == DataType::DE_INT32) {
932 *data = py::array_t<int32_t>(shape_.AsVector(), reinterpret_cast<int32_t *>(data_));
933 } else if (type_ == DataType::DE_INT64) {
934 *data = py::array_t<int64_t>(shape_.AsVector(), reinterpret_cast<int64_t *>(data_));
935 } else if (type_ == DataType::DE_UINT8) {
936 *data = py::array_t<uint8_t>(shape_.AsVector(), reinterpret_cast<uint8_t *>(data_));
937 } else if (type_ == DataType::DE_UINT16) {
938 *data = py::array_t<uint16_t>(shape_.AsVector(), reinterpret_cast<uint16_t *>(data_));
939 } else if (type_ == DataType::DE_UINT32) {
940 *data = py::array_t<uint32_t>(shape_.AsVector(), reinterpret_cast<uint32_t *>(data_));
941 } else if (type_ == DataType::DE_UINT64) {
942 *data = py::array_t<uint64_t>(shape_.AsVector(), reinterpret_cast<uint64_t *>(data_));
943 } else if (type_ == DataType::DE_FLOAT16) {
944 *data = py::array_t<float16>(shape_.AsVector(), reinterpret_cast<float16 *>(data_));
945 } else if (type_ == DataType::DE_FLOAT32) {
946 *data = py::array_t<float>(shape_.AsVector(), reinterpret_cast<float *>(data_));
947 } else if (type_ == DataType::DE_FLOAT64) {
948 *data = py::array_t<double>(shape_.AsVector(), reinterpret_cast<double *>(data_));
949 } else if (type_.IsString()) {
950 RETURN_IF_NOT_OK(GetDataAsNumpyStrings(data));
951 } else {
952 RETURN_STATUS_UNEXPECTED("Got unexpected type when returning numpy");
953 }
954 return Status::OK();
955 }
956
GetDataAsNumpyStrings(py::array * data)957 Status Tensor::GetDataAsNumpyStrings(py::array *data) {
958 RETURN_UNEXPECTED_IF_NULL(data);
959 if (type_ == DataType::DE_STRING) {
960 RETURN_IF_NOT_OK(GetDataAsNumpyStrings<py::str>(data));
961 } else if (type_ == DataType::DE_BYTES) {
962 RETURN_IF_NOT_OK(GetDataAsNumpyStrings<py::bytes>(data));
963 } else {
964 RETURN_STATUS_UNEXPECTED("Can not convert a numeric Tensor to a string NumPy array.");
965 }
966 return Status::OK();
967 }
968
GetDataAsPythonObject(py::dict * data)969 Status Tensor::GetDataAsPythonObject(py::dict *data) {
970 RETURN_UNEXPECTED_IF_NULL(data);
971 {
972 py::gil_scoped_acquire gil_acquire;
973 *data = python_dict_;
974 }
975 return Status::OK();
976 }
977 #endif
978
Squeeze()979 void Tensor::Squeeze() { shape_ = shape_.Squeeze(); }
980
981 template <typename T>
GetUnsignedIntAt(T * o,const std::vector<dsize_t> & index) const982 Status Tensor::GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const {
983 RETURN_UNEXPECTED_IF_NULL(o);
984 if (data_ == nullptr) {
985 RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
986 }
987 if (!type_.IsLooselyCompatible<T>()) {
988 std::string err = "Template type and Tensor type are not compatible";
989 RETURN_STATUS_UNEXPECTED(err);
990 }
991 switch (type_.value()) {
992 case DataType::DE_UINT8: {
993 uint8_t *ptr = nullptr;
994 RETURN_IF_NOT_OK(GetItemPtr<uint8_t>(&ptr, index));
995 *o = static_cast<T>(*ptr);
996 break;
997 }
998 case DataType::DE_UINT16: {
999 uint16_t *ptr = nullptr;
1000 RETURN_IF_NOT_OK(GetItemPtr<uint16_t>(&ptr, index));
1001 *o = static_cast<T>(*ptr);
1002 break;
1003 }
1004 case DataType::DE_UINT32: {
1005 uint32_t *ptr = nullptr;
1006 RETURN_IF_NOT_OK(GetItemPtr<uint32_t>(&ptr, index));
1007 *o = static_cast<T>(*ptr);
1008 break;
1009 }
1010 case DataType::DE_UINT64: {
1011 uint64_t *ptr = nullptr;
1012 RETURN_IF_NOT_OK(GetItemPtr<uint64_t>(&ptr, index));
1013 *o = static_cast<T>(*ptr);
1014 break;
1015 }
1016 default:
1017 std::string err = "Tensor Type is not an unsigned Integer";
1018 RETURN_STATUS_UNEXPECTED(err);
1019 }
1020 return Status::OK();
1021 }
1022
1023 template <typename T>
GetSignedIntAt(T * o,const std::vector<dsize_t> & index) const1024 Status Tensor::GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const {
1025 RETURN_UNEXPECTED_IF_NULL(o);
1026 if (data_ == nullptr) {
1027 RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
1028 }
1029 if (!type_.IsLooselyCompatible<T>()) {
1030 std::string err = "Template type and Tensor type are not compatible";
1031 RETURN_STATUS_UNEXPECTED(err);
1032 }
1033 switch (type_.value()) {
1034 case DataType::DE_INT8: {
1035 int8_t *ptr = nullptr;
1036 RETURN_IF_NOT_OK(GetItemPtr<int8_t>(&ptr, index));
1037 *o = static_cast<T>(*ptr);
1038 break;
1039 }
1040 case DataType::DE_INT16: {
1041 int16_t *ptr = nullptr;
1042 RETURN_IF_NOT_OK(GetItemPtr<int16_t>(&ptr, index));
1043 *o = static_cast<T>(*ptr);
1044 break;
1045 }
1046 case DataType::DE_INT32: {
1047 int32_t *ptr = nullptr;
1048 RETURN_IF_NOT_OK(GetItemPtr<int32_t>(&ptr, index));
1049 *o = static_cast<T>(*ptr);
1050 break;
1051 }
1052 case DataType::DE_INT64: {
1053 int64_t *ptr = nullptr;
1054 RETURN_IF_NOT_OK(GetItemPtr<int64_t>(&ptr, index));
1055 *o = static_cast<T>(*ptr);
1056 break;
1057 }
1058 default:
1059 std::string err = "Tensor Type is not a signed Integer";
1060 RETURN_STATUS_UNEXPECTED(err);
1061 }
1062 return Status::OK();
1063 }
1064
1065 template <typename T>
GetFloatAt(T * o,const std::vector<dsize_t> & index) const1066 Status Tensor::GetFloatAt(T *o, const std::vector<dsize_t> &index) const {
1067 RETURN_UNEXPECTED_IF_NULL(o);
1068 if (data_ == nullptr) {
1069 RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
1070 }
1071 if (!type_.IsLooselyCompatible<T>()) {
1072 std::string err = "Template type and Tensor type are not compatible";
1073 RETURN_STATUS_UNEXPECTED(err);
1074 }
1075 switch (type_.value()) {
1076 case DataType::DE_FLOAT16: {
1077 float16 *ptr = nullptr;
1078 RETURN_IF_NOT_OK(GetItemPtr<float16>(&ptr, index));
1079 *o = static_cast<T>(*ptr);
1080 break;
1081 }
1082 case DataType::DE_FLOAT32: {
1083 float *ptr = nullptr;
1084 RETURN_IF_NOT_OK(GetItemPtr<float>(&ptr, index));
1085 *o = static_cast<T>(*ptr);
1086 break;
1087 }
1088 case DataType::DE_FLOAT64: {
1089 double *ptr = nullptr;
1090 RETURN_IF_NOT_OK(GetItemPtr<double>(&ptr, index));
1091 *o = static_cast<T>(*ptr);
1092 break;
1093 }
1094 default:
1095 std::string err = "Tensor Type is not a float/double";
1096 RETURN_STATUS_UNEXPECTED(err);
1097 }
1098 return Status::OK();
1099 }
1100
GetStringAt(dsize_t index,uchar ** string_start,offset_t * length) const1101 Status Tensor::GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const {
1102 CHECK_FAIL_RETURN_UNEXPECTED(type_.IsString(), "Type is not string or bytes.");
1103 RETURN_UNEXPECTED_IF_NULL(data_);
1104 RETURN_UNEXPECTED_IF_NULL(string_start);
1105 RETURN_UNEXPECTED_IF_NULL(length);
1106 auto *offset_ptr = reinterpret_cast<offset_t *>(data_); // offsets starts here
1107 offset_t start = offset_ptr[index];
1108 *string_start = data_ + start;
1109 *length = offset_ptr[index + 1] - start - 1; // -1 to skip the \0 from the string length
1110 return Status::OK();
1111 }
1112
CopyLastDimAt(const std::shared_ptr<Tensor> & src,const std::vector<dsize_t> & index)1113 Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index) {
1114 RETURN_UNEXPECTED_IF_NULL(src);
1115 CHECK_FAIL_RETURN_UNEXPECTED(src->type() == type_, "Source Tensor has a different type");
1116 CHECK_FAIL_RETURN_UNEXPECTED(index.back() == 0, "Last dim in index should be 0");
1117
1118 uint8_t type_size = type_.SizeInBytes();
1119 size_t len = std::min(src->shape()[-1], shape_[-1]) * type_size;
1120 dsize_t src_flat_ind = 0, dst_flat_ind = 0;
1121 RETURN_IF_NOT_OK(src->shape().ToFlatIndex(index, &src_flat_ind));
1122 RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &dst_flat_ind));
1123
1124 const unsigned char *src_addr = src->GetBuffer() + src_flat_ind * type_size;
1125 unsigned char *dst_addr = GetMutableBuffer() + dst_flat_ind * type_size;
1126 CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(dst_addr, len, src_addr, len) == EOK, "memcpy error");
1127 return Status::OK();
1128 }
1129
GetSliceOption(const SliceOption & slice_option,const int32_t & slice_index,SliceOption * slice_option_ptr)1130 Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &slice_index,
1131 SliceOption *slice_option_ptr) {
1132 RETURN_UNEXPECTED_IF_NULL(slice_option_ptr);
1133 if (slice_option.indices_.empty() && !slice_option.slice_.valid()) {
1134 RETURN_STATUS_UNEXPECTED("Both indices and slices can not be empty.");
1135 }
1136
1137 if (!slice_option.indices_.empty() && slice_option.slice_.valid()) {
1138 RETURN_STATUS_UNEXPECTED("Both indices and slices can not be given.");
1139 }
1140
1141 CHECK_FAIL_RETURN_UNEXPECTED(shape_.Size() > slice_index, "Invalid shape, should be greater than slices index.");
1142 // if slice object was provided, indices should be empty. Generate indices from the slice object.
1143 if (slice_option.indices_.empty()) {
1144 // check if slice is valid
1145 mindspore::dataset::Slice slice_copy = slice_option.slice_;
1146 slice_copy.start_ = HandleNeg(slice_option.slice_.start_, shape_[slice_index]);
1147 slice_copy.stop_ = HandleNeg(slice_option.slice_.stop_, shape_[slice_index]);
1148 slice_copy.start_ = slice_copy.start_ < 0 ? 0 : slice_copy.start_;
1149 slice_copy.stop_ = slice_copy.stop_ < 0 ? 0 : slice_copy.stop_;
1150 dsize_t max_idx = shape_[slice_index];
1151 slice_copy.start_ = slice_copy.start_ > max_idx ? max_idx : slice_copy.start_;
1152 slice_copy.stop_ = slice_copy.stop_ > max_idx ? max_idx : slice_copy.stop_;
1153 *slice_option_ptr = SliceOption(slice_copy);
1154 } else {
1155 // indices validation
1156 std::vector<dsize_t> indices_copy;
1157 for (int j = 0; j < slice_option.indices_.size(); j++) {
1158 dsize_t index = HandleNeg(slice_option.indices_[j], shape_[slice_index]);
1159 CHECK_FAIL_RETURN_UNEXPECTED(index < shape_[slice_index] && index >= 0,
1160 "Index " + std::to_string(index) + " is out of bounds.");
1161 indices_copy.emplace_back(index);
1162 }
1163 *slice_option_ptr = SliceOption(indices_copy);
1164 }
1165 return Status::OK();
1166 }
1167
Slice(std::shared_ptr<Tensor> * out,const std::vector<SliceOption> & slice_options)1168 Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption> &slice_options) {
1169 RETURN_UNEXPECTED_IF_NULL(out);
1170 std::vector<SliceOption> converted_slice_objects;
1171
1172 CHECK_FAIL_RETURN_UNEXPECTED(slice_options.size() <= static_cast<size_t>(std::numeric_limits<dsize_t>::max()),
1173 "The size of slice_options_ must not be more than \"INT64_MAX\".");
1174 for (size_t k = 0; k < slice_options.size(); k++) {
1175 SliceOption slice_option = slice_options[k];
1176
1177 if (slice_option.all_) {
1178 auto slice = mindspore::dataset::Slice(shape_[static_cast<dsize_t>(k)]);
1179 converted_slice_objects.emplace_back(slice);
1180 continue;
1181 }
1182
1183 CHECK_FAIL_RETURN_UNEXPECTED(k <= static_cast<size_t>(std::numeric_limits<int32_t>::max()),
1184 "GetSliceOption() can't function properly if there are "
1185 "more than \"INT32_MAX\" slice options");
1186 SliceOption slice_option_item(false);
1187 RETURN_IF_NOT_OK(GetSliceOption(slice_option, static_cast<int32_t>(k), &slice_option_item));
1188 converted_slice_objects.emplace_back(slice_option_item);
1189 }
1190
1191 // partial slices, pass in the rest
1192 if (slice_options.size() != Rank()) {
1193 for (auto j = static_cast<dsize_t>(slice_options.size()); j < Rank(); j++) {
1194 mindspore::dataset::Slice slice = mindspore::dataset::Slice(0, shape_[j]);
1195 converted_slice_objects.emplace_back(SliceOption(slice));
1196 }
1197 }
1198
1199 // determine final shape:
1200 TensorShape t = TensorShape({});
1201 dsize_t slice_len = slice_options.size();
1202 dsize_t slice_len_ind;
1203 for (int i = 0; i < shape_.Rank(); i++) {
1204 if (i < slice_len) {
1205 // if it's a slice
1206 if (converted_slice_objects[i].indices_.empty() && converted_slice_objects[i].slice_.step_ != 0) {
1207 slice_len_ind = (converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) /
1208 converted_slice_objects[i].slice_.step_;
1209 if ((converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) %
1210 converted_slice_objects[i].slice_.step_ !=
1211 0) {
1212 slice_len_ind++;
1213 }
1214 // account for slices that would return no data
1215 slice_len_ind = slice_len_ind < 0 ? 0 : slice_len_ind;
1216 t = t.AppendDim(slice_len_ind);
1217 } else {
1218 // if its a vector of indices
1219 // need to introduce a way of handling indices and slices
1220 if (!converted_slice_objects[i].indices_.empty()) {
1221 t = t.AppendDim(converted_slice_objects[i].indices_.size());
1222 }
1223 }
1224 } else {
1225 // add in the rest of the dimensions
1226 slice_len_ind = shape_[i];
1227 t = t.AppendDim(slice_len_ind);
1228 }
1229 }
1230
1231 std::vector<std::vector<dsize_t>> indices_vector = IndexGenerator(converted_slice_objects);
1232
1233 if (indices_vector.empty()) {
1234 return CreateEmpty(t, type_, out);
1235 }
1236 if (type_.IsNumeric()) {
1237 return SliceNumeric(out, indices_vector, t);
1238 } else {
1239 return SliceString(out, indices_vector, t);
1240 }
1241 }
1242
SliceNumeric(std::shared_ptr<Tensor> * out,const std::vector<std::vector<dsize_t>> & indices,const TensorShape & shape)1243 Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
1244 const TensorShape &shape) {
1245 RETURN_UNEXPECTED_IF_NULL(out);
1246 RETURN_IF_NOT_OK(CreateEmpty(shape, type_, out));
1247
1248 RETURN_UNEXPECTED_IF_NULL(out);
1249 (*out)->GetMutableBuffer();
1250 dsize_t out_index = 0;
1251 std::vector<dsize_t> dim_length = shape_.AsVector();
1252 dsize_t type_size = type_.SizeInBytes();
1253 std::vector<dsize_t> src_start = HandleNegIndices(indices[0], dim_length);
1254 dsize_t src_start_index;
1255 RETURN_IF_NOT_OK(shape_.ToFlatIndex(src_start, &src_start_index));
1256
1257 uchar *dst_addr = (*out)->data_;
1258 dsize_t count = 1;
1259
1260 // to handle partial slices
1261 dsize_t current_stride = shape_.Strides()[indices[0].size() - 1];
1262 auto indices_size = static_cast<dsize_t>(indices.size());
1263 for (dsize_t i = 0; i < indices_size; i++) {
1264 std::vector<dsize_t> cur_index = HandleNegIndices(indices[i], dim_length);
1265 if (i < indices_size - 1) {
1266 std::vector<dsize_t> next_index = HandleNegIndices(indices[i + 1], dim_length);
1267 dsize_t flat_idx_curr;
1268 dsize_t flat_idx_next;
1269
1270 RETURN_IF_NOT_OK(shape_.ToFlatIndex(cur_index, &flat_idx_curr));
1271 RETURN_IF_NOT_OK(shape_.ToFlatIndex(next_index, &flat_idx_next));
1272
1273 if (flat_idx_next == flat_idx_curr + current_stride) {
1274 count++;
1275 continue;
1276 }
1277 }
1278
1279 int return_code = memcpy_s(dst_addr + out_index * type_size, (*out)->SizeInBytes(),
1280 data_ + src_start_index * type_size, count * type_size * current_stride);
1281 CHECK_FAIL_RETURN_UNEXPECTED(return_code == EOK, "memcpy_s failed in SliceNumeric");
1282 out_index += count * current_stride;
1283 if (i < indices_size - 1) {
1284 src_start = HandleNegIndices(indices[i + 1], dim_length); // next index
1285 RETURN_IF_NOT_OK(shape_.ToFlatIndex(src_start, &src_start_index));
1286 }
1287 count = 1;
1288 }
1289 return Status::OK();
1290 }
1291
SliceString(std::shared_ptr<Tensor> * out,const std::vector<std::vector<dsize_t>> & indices,const TensorShape & shape)1292 Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
1293 const TensorShape &shape) {
1294 RETURN_UNEXPECTED_IF_NULL(out);
1295 std::vector<dsize_t> dim_length = shape_.AsVector();
1296 std::vector<std::string> strings;
1297
1298 for (const std::vector<dsize_t> &index : indices) {
1299 std::vector<dsize_t> cur_index = HandleNegIndices(index, dim_length);
1300 dsize_t cur_flat_index;
1301 RETURN_IF_NOT_OK(shape_.ToFlatIndex(cur_index, &cur_flat_index));
1302 std::string_view sv;
1303 RETURN_IF_NOT_OK(GetItemAt(&sv, {cur_index}));
1304 strings.emplace_back(sv);
1305 }
1306 return CreateFromVector(strings, shape, type_, out);
1307 }
1308
CreateFromMSTensor(const MSTensor & in,TensorPtr * out)1309 Status Tensor::CreateFromMSTensor(const MSTensor &in, TensorPtr *out) {
1310 if (in.Data() == nullptr) {
1311 *out = nullptr;
1312 return Status::OK();
1313 }
1314 return Tensor::CreateFromMemory(TensorShape(in.Shape()), MSTypeToDEType(static_cast<TypeId>(in.DataType())),
1315 (const uchar *)(in.Data().get()), in.DataSize(), out);
1316 }
1317 } // namespace dataset
1318 } // namespace mindspore
1319