• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2024 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/core/tensor.h"
17 
18 #include <iomanip>
19 #include <iostream>
20 #include <fstream>
21 #include <functional>
22 #include <limits>
23 #include <memory>
24 #include <vector>
25 #include <utility>
26 
27 #ifndef ENABLE_ANDROID
28 #include "minddata/dataset/core/cv_tensor.h"
29 #endif
30 #include "minddata/dataset/core/global_context.h"
31 #ifdef ENABLE_PYTHON
32 #include "minddata/dataset/core/pybind_support.h"
33 #endif
34 #include "minddata/dataset/core/tensor_shape.h"
35 #include "minddata/dataset/core/type_id.h"
36 #include "minddata/dataset/include/dataset/constants.h"
37 #include "minddata/dataset/util/validators.h"
38 #include "utils/ms_utils.h"
39 
40 #ifdef ENABLE_PYTHON
41 namespace py = pybind11;
42 #endif
43 
44 namespace mindspore {
45 namespace dataset {
46 // Helper macros for printing tensor elements
47 #define CASE_PRINT(de_type, native_type)    \
48   case de_type: {                           \
49     native_type o;                          \
50     rc = GetItemAt<native_type>(&o, index); \
51     out << o;                               \
52     break;                                  \
53   }
54 
55 #define CASE_PRINT_HEX(de_type, native_type)                                                    \
56   case de_type: {                                                                               \
57     native_type o;                                                                              \
58     rc = GetItemAt<native_type>(&o, index);                                                     \
59     out << std::hex << std::setw(2) << std::setfill('0') << o << std::dec << std::setfill(' '); \
60     break;                                                                                      \
61   }
62 
Tensor(TensorShape shape,DataType type)63 Tensor::Tensor(TensorShape shape, DataType type) : shape_(std::move(shape)), type_(type), data_(nullptr) {}
64 
Tensor(Tensor && other)65 Tensor::Tensor(Tensor &&other) noexcept
66     : shape_(std::move(other.shape_)), type_(other.type_), data_(other.data_), data_end_(other.data_end_) {
67 #ifdef ENABLE_PYTHON
68   if (type_.value() == DataType::DE_PYTHON) {
69     py::gil_scoped_acquire gil_acquire;
70     python_dict_ = std::move(other.python_dict_);
71   }
72   // If other.python_array_ has value, assign it to this->python_array_
73   if (static_cast<bool>(other.python_array_)) {
74     py::gil_scoped_acquire gil_acquire;
75     python_array_ = (other.python_array_);
76   }
77 #endif
78   other.Invalidate();
79 }
80 
operator =(Tensor && other)81 Tensor &Tensor::operator=(Tensor &&other) noexcept {
82   if (&other != this) {
83     shape_ = std::move(other.shape_);
84     type_ = other.type_;
85     data_ = other.data_;
86     data_end_ = other.data_end_;
87     yuv_shape_ = std::move(other.yuv_shape_);
88 #ifdef ENABLE_PYTHON
89     if (type_.value() == DataType::DE_PYTHON) {
90       py::gil_scoped_acquire gil_acquire;
91       python_dict_ = std::move(other.python_dict_);
92     }
93     // If other.python_array_ has value, assign it to this->python_array_
94     if (static_cast<bool>(other.python_array_)) {
95       py::gil_scoped_acquire gil_acquire;
96       python_array_ = (other.python_array_);
97     }
98 #endif
99     other.Invalidate();
100   }
101   return *this;
102 }
103 
CreateEmpty(const TensorShape & shape,const DataType & type,TensorPtr * out)104 Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out) {
105   RETURN_UNEXPECTED_IF_NULL(out);
106   CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Failed to create empty tensor, tensor shape is unknown.");
107   CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Failed to create empty tensor, data type is unknown.");
108   *out = std::make_shared<Tensor>(shape, type);
109   CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Failed to create empty tensor, allocate memory failed.");
110   // if it's a string tensor and it has no elements, Just initialize the shape and type.
111   if (!type.IsNumeric()) {
112     if (shape.NumOfElements() == 0) {
113       return Status::OK();
114     } else {
115       RETURN_STATUS_UNEXPECTED(
116         "Failed to create empty tensor, number of elements should be 0 when data type is string.");
117     }
118   }
119 
120   int64_t byte_size = (*out)->SizeInBytes();
121 
122   // Don't allocate if we have a tensor with no elements.
123   if (byte_size != 0) {
124     RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
125   }
126   return Status::OK();
127 }
128 
CreateFromMemory(const TensorShape & shape,const DataType & type,const uchar * src,TensorPtr * out)129 Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) {
130   RETURN_IF_NOT_OK(CreateEmpty(shape, type, out));
131   if (src != nullptr && out != nullptr) {
132     // Given the shape/type of this tensor, compute the data size and copy in the input bytes.
133     int64_t byte_size = (*out)->SizeInBytes();
134     if (byte_size == 0) {
135       return Status::OK();
136     }
137     std::string err_msg =
138       "Failed to copy data into tensor. If GeneratorDataset(source=Pyfunc, ...) or map(operations=Pyfunc, ...) is "
139       "used, please check whether the memory of the "
140       "Numpy object returned by Pyfunc has been unexpectedly freed. Adding copy.deepcopy(numpy_object) before "
141       "numpy_object returned by Pyfunc maybe solve the issue. For more details, please refer to the FAQ at "
142       "https://www.mindspore.cn/docs/en/master/faq/data_processing.html.";
143     if (byte_size < SECUREC_MEM_MAX_LEN) {
144       int ret_code = memcpy_s((*out)->data_, byte_size, src, byte_size);
145       CHECK_FAIL_RETURN_UNEXPECTED(ret_code == EOK, err_msg);
146     } else {
147       auto ret_code = std::memcpy((*out)->data_, src, byte_size);
148       CHECK_FAIL_RETURN_UNEXPECTED(ret_code == (*out)->data_, err_msg);
149     }
150   }
151   return Status::OK();
152 }
153 
CreateFromMemory(const TensorShape & shape,const DataType & type,const uchar * src,const dsize_t & length,TensorPtr * out)154 Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, const dsize_t &length,
155                                 TensorPtr *out) {
156   RETURN_UNEXPECTED_IF_NULL(out);
157   *out = std::make_shared<Tensor>(shape, type);
158   CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
159   if (type.IsNumeric()) {
160     dsize_t calculated_length = (*out)->SizeInBytes();
161     CHECK_FAIL_RETURN_UNEXPECTED(calculated_length == length, "Length of source data does not match the shape.");
162   } else {
163     // min_length is the length of a tensor with empty strings
164     // min_length = the number of bytes needed to store the offsets + 1 byte for each element
165     dsize_t min_length = (shape.NumOfElements() + 1) * kOffsetSize + shape.NumOfElements();
166     CHECK_FAIL_RETURN_UNEXPECTED(min_length <= length, "Length of source data does not match the shape.");
167   }
168 
169   RETURN_IF_NOT_OK((*out)->AllocateBuffer(length));
170   if (length == 0) {
171     return Status::OK();
172   }
173   RETURN_UNEXPECTED_IF_NULL(src);  // needs to be here as we may return early without using src content (empty tensors)
174   if (length < SECUREC_MEM_MAX_LEN) {
175     int ret_code = memcpy_s((*out)->data_, length, src, length);
176     CHECK_FAIL_RETURN_UNEXPECTED(ret_code == EOK, "Failed to copy data into tensor.");
177   } else {
178     auto ret_code = std::memcpy((*out)->data_, src, length);
179     CHECK_FAIL_RETURN_UNEXPECTED(ret_code == (*out)->data_, "Failed to copy data into tensor.");
180   }
181 
182   return Status::OK();
183 }
184 
185 #ifdef ENABLE_PYTHON
CreateFromNpString(py::array arr,std::shared_ptr<Tensor> * out)186 Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
187   RETURN_UNEXPECTED_IF_NULL(out);
188   std::vector<dsize_t> shape;
189   for (size_t i = 0; i < arr.ndim(); i++) {
190     shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
191   }
192   arr.resize({arr.size()});  // flatten the py::array so we can iterate once
193   std::vector<std::string> strings;
194   strings.reserve(arr.size());
195   (void)std::for_each(arr.begin(), arr.end(),
196                       [&strings](const auto &s) { strings.emplace_back(py::cast<py::str>(s)); });
197   arr.resize(shape);  // resize arr back to the original shape
198 
199   if (arr.dtype().kind() == 'U') {  // numpy dtype type is "U"
200     RETURN_IF_NOT_OK(CreateFromVector(strings, TensorShape{shape}, DataType(DataType::DE_STRING), out));
201   } else {  // numpy dtype type is "S"
202     RETURN_IF_NOT_OK(CreateFromVector(strings, TensorShape{shape}, DataType(DataType::DE_BYTES), out));
203   }
204 
205   return Status::OK();
206 }
207 
CreateFromNpArray(py::array arr,std::shared_ptr<Tensor> * out)208 Status Tensor::CreateFromNpArray(py::array arr, std::shared_ptr<Tensor> *out) {
209   RETURN_UNEXPECTED_IF_NULL(out);
210   DataType type = DataType::FromNpArray(arr);
211   CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN,
212                                "Failed to create tensor from numpy array, data type is unknown.");
213 
214   if (type.IsString()) {
215     return CreateFromNpString(arr, out);
216   }
217 
218   std::vector<dsize_t> shape;
219   std::vector<dsize_t> strides;
220   // check if strides are contiguous
221   bool is_strided = false;
222   dsize_t count = arr.size();
223   for (dsize_t i = 0; i < arr.ndim(); i++) {
224     shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
225     strides.push_back(static_cast<dsize_t>(arr.strides()[i]));
226     // in case of empty array num_items=0
227     if (count != 0 && shape.size() > i && shape[i] != 0) {
228       count /= shape[i];
229       if (strides[i] != arr.itemsize() * count) {
230         is_strided = true;
231       }
232     }
233   }
234 
235   if (is_strided) {
236     unsigned char *data = static_cast<unsigned char *>(arr.request().ptr);
237     RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape(shape), type, out));
238     RETURN_IF_NOT_OK(CopyStridedArray((*out)->data_, data, shape, strides, (*out)->type_.SizeInBytes()));
239   } else {
240 #ifdef ENABLE_PYTHON
241     // here we create empty tensor and use this->python_array_ point to data which is np.ndarray
242     *out = std::make_shared<Tensor>(TensorShape(shape), type);
243     {
244       py::gil_scoped_acquire gil_acquire;
245       (*out)->python_array_ = arr;
246     }
247     unsigned char *data = static_cast<unsigned char *>((*out)->python_array_.request().ptr);
248     int64_t byte_size = (*out)->SizeInBytes();
249     if (byte_size == 0) {
250       return Status::OK();
251     }
252     (*out)->data_ = data;
253     (*out)->data_end_ = data + byte_size;
254 #else
255     RETURN_IF_NOT_OK(Tensor::CreateFromMemory(TensorShape(shape), type, data, out));
256 #endif
257   }
258   return Status::OK();
259 }
260 
CreateFromPythonObject(py::object obj,std::shared_ptr<Tensor> * out)261 Status Tensor::CreateFromPythonObject(py::object obj, std::shared_ptr<Tensor> *out) {
262   RETURN_UNEXPECTED_IF_NULL(out);
263   std::vector<dsize_t> shape{};
264   DataType type = DataType(DataType::DE_PYTHON);
265   *out = std::make_shared<Tensor>(TensorShape({0}), type);
266   {
267     py::gil_scoped_acquire gil_acquire;
268     (*out)->python_dict_ = obj;
269 
270     // serialize python object to bytes which used by dataset independent process mode
271     (*out)->python_dict_as_str_ = py::str(py::module::import("pickle").attr("dumps")((*out)->python_dict_));
272     (*out)->data_ = reinterpret_cast<unsigned char *>((*out)->python_dict_as_str_.data());
273     (*out)->data_end_ = (*out)->data_ + (*out)->python_dict_as_str_.length();
274   }
275   CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Failed to create a tensor for python object.");
276   return Status::OK();
277 }
278 
279 #endif
280 
281 #ifndef ENABLE_ANDROID
CreateFromByteList(const dataengine::BytesList & bytes_list,const TensorShape & shape,TensorPtr * out)282 Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out) {
283   RETURN_UNEXPECTED_IF_NULL(out);
284   *out = std::make_shared<Tensor>(TensorShape({static_cast<dsize_t>(bytes_list.value_size())}),
285                                   DataType(DataType::DE_STRING));
286   CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
287   // total bytes needed = offset array + strings
288   // offset array needs to store one offset var per element + 1 extra to get the length of the last string.
289   // strings will be null-terminated --> need 1 extra byte per element
290   dsize_t num_bytes = (kOffsetSize) * (*out)->shape_.NumOfElements() + kOffsetSize + bytes_list.ByteSizeLong();
291 
292   (*out)->data_ = GetAllocator()->allocate(num_bytes);
293 
294   auto offset_arr = reinterpret_cast<offset_t *>((*out)->data_);
295   uchar *buf = (*out)->GetStringsBuffer();
296 
297   offset_t offset = buf - (*out)->data_;  // the first string will start here
298   int32_t i = 0;
299   for (; i < bytes_list.value_size(); i++) {
300     const std::string &str = bytes_list.value(i);
301     //  insert the start index of the string.
302     offset_arr[i] = offset;
303     // total bytes are reduced by kOffsetSize
304     num_bytes -= kOffsetSize;
305     // insert actual string
306     int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
307     CHECK_FAIL_RETURN_UNEXPECTED(ret_code == EOK, "Cannot copy string into Tensor");
308     //  next string will be stored right after the current one.
309     offset = offset + str.length() + 1;
310     // total bytes are reduced by the length of the string
311     num_bytes -= str.length() + 1;
312   }
313   // store one more offset value so we can get the length of the last string
314   // length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
315   offset_arr[i] = offset;
316 
317   (*out)->data_end_ = (*out)->data_ + offset_arr[i];
318 
319   MS_ASSERT(num_bytes == 0);
320   RETURN_IF_NOT_OK((*out)->Reshape(shape));
321   return Status::OK();
322 }
323 #endif
324 
CreateFromFile(const std::string & path,std::shared_ptr<Tensor> * out)325 Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *out) {
326   RETURN_UNEXPECTED_IF_NULL(out);
327   Path file(path);
328   if (file.IsDirectory()) {
329     RETURN_STATUS_UNEXPECTED("Invalid file found: " + path + ", should be file, but got directory.");
330   }
331   std::ifstream fs;
332   fs.open(path, std::ios::binary | std::ios::in);
333   CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Failed to open file: " + path);
334   int64_t num_bytes = fs.seekg(0, std::ios::end).tellg();
335   if (num_bytes >= kDeMaxDim) {
336     fs.close();
337     RETURN_STATUS_UNEXPECTED("Invalid file to allocate tensor memory, check path: " + path);
338   }
339   if (!fs.seekg(0, std::ios::beg).good()) {
340     fs.close();
341     RETURN_STATUS_UNEXPECTED("Failed to find size of file, check path: " + path);
342   }
343   auto s = Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out);
344   if (s != Status::OK()) {
345     fs.close();
346     return s;
347   }
348   int64_t written_bytes = fs.read(reinterpret_cast<char *>((*out)->GetMutableBuffer()), num_bytes).gcount();
349   if (!(written_bytes == num_bytes && fs.good())) {
350     fs.close();
351     RETURN_STATUS_UNEXPECTED("Error in writing to tensor, check path: " + path);
352   }
353   fs.close();
354   return Status::OK();
355 }
356 
357 #ifndef ENABLE_ANDROID
CreateFromByteList(const dataengine::BytesList & bytes_list,const TensorShape & shape,const DataType & type,dsize_t pad_size,TensorPtr * out)358 Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
359                                   const DataType &type, dsize_t pad_size, TensorPtr *out) {
360   RETURN_UNEXPECTED_IF_NULL(out);
361   RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, type, out));
362 
363   RETURN_UNEXPECTED_IF_NULL(out);
364   unsigned char *current_tensor_addr = (*out)->GetMutableBuffer();
365   int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size;
366 
367   for (int i = 0; i < bytes_list.value_size(); i++) {
368     // read string data into tensor
369     const std::string &current_element = bytes_list.value(i);
370     int return_code =
371       memcpy_s(current_tensor_addr, tensor_bytes_remaining, common::SafeCStr(current_element), current_element.size());
372 
373     CHECK_FAIL_RETURN_UNEXPECTED(return_code == EOK, "memcpy_s failed when reading bytesList element into Tensor");
374 
375     current_tensor_addr += current_element.size();
376     tensor_bytes_remaining -= current_element.size();
377 
378     // pad
379     int64_t chars_to_pad = pad_size - current_element.size();
380     return_code = memset_s(current_tensor_addr, tensor_bytes_remaining, static_cast<int>(' '), chars_to_pad);
381     CHECK_FAIL_RETURN_UNEXPECTED(return_code == EOK, "memcpy_s failed when padding Tensor");
382 
383     current_tensor_addr += chars_to_pad;
384     tensor_bytes_remaining -= chars_to_pad;
385   }
386 
387   return Status::OK();
388 }
389 #endif
390 
391 // Memcpy the given strided array's used part to consecutive memory
392 // Consider a 3-d array
393 // A[(i * shape[1] + j) * shape[2] + k] = B[i][j][k] = C[i * strides[0] + j * strides[1] + k * strides[2]]
394 // Here we convert array C to array A, by memcpy index by index (Note that not all elements in C is copied)
CopyStridedArray(unsigned char * dst,unsigned char * src,std::vector<dsize_t> shape,std::vector<dsize_t> strides,uint8_t type_size)395 Status Tensor::CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
396                                 std::vector<dsize_t> strides, uint8_t type_size) {
397   RETURN_UNEXPECTED_IF_NULL(dst);
398   RETURN_UNEXPECTED_IF_NULL(src);
399   dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>());
400   for (dsize_t i = 0; i < size; ++i) {
401     dsize_t offset = 0;
402     dsize_t count = i;
403     for (size_t j = 0; j < shape.size(); ++j) {
404       // convert 1d array's index to 3d array's index (A -> B)
405       CHECK_FAIL_RETURN_UNEXPECTED(shape[shape.size() - 1 - j] != 0, "Invalid data, shape can't be zero.");
406       dsize_t idx = count % shape[shape.size() - 1 - j];
407       count /= shape[shape.size() - 1 - j];
408       // calculate the raw data offset based on strides (B -> C)
409       offset += idx * strides[shape.size() - 1 - j];
410       // once count = 0, the following idxes are all zero, skip them
411       if (count == 0) {
412         break;
413       }
414     }
415     // strides already consider byte size of the data type, but dst doesn't.
416     // dst[i] = dst + i * type_size = src + offset
417     int ret_code = memcpy_s(dst + i * type_size, type_size, src + offset, type_size);
418     if (ret_code != EOK) {
419       RETURN_STATUS_UNEXPECTED("Failed to copy data into Tensor.");
420     }
421   }
422   return Status::OK();
423 }
424 
425 // Name: Destructor
426 // Description: Destructor
~Tensor()427 Tensor::~Tensor() {
428 #ifdef ENABLE_PYTHON
429   if (!static_cast<bool>(python_array_)) {  // the data is not np.ndarray from python layer
430 #endif
431     if (!type().IsPython()) {  // The Tensor is a python_dict_
432       if (data_ != nullptr) {
433         if (GetAllocator() != nullptr) {
434           GetAllocator()->deallocate(data_);
435           data_ = nullptr;
436           data_end_ = nullptr;
437         } else {
438           // If we didn't have an allocator, but data_ is not null then it must
439           // be a stand-alone tensor that used malloc directly.
440           free(data_);
441           data_ = nullptr;
442           data_end_ = nullptr;
443         }
444       }
445     }
446 #ifdef ENABLE_PYTHON
447   } else {
448     // release the data from python layer
449     py::gil_scoped_acquire gil_acquire;
450     python_array_ = py::none();  // let borrowed python ndarray ref - 1
451   }
452 #endif
453 #ifdef ENABLE_PYTHON
454   try {
455     // The default destructor will not acquire the Python GIL when it destructs
456     // the class members, so we need to handle py::object manually.
457     if (Py_IsInitialized() > 0) {
458       if (static_cast<bool>(python_dict_)) {
459         // Acquire Python GIL
460         py::gil_scoped_acquire gil_acquire;
461         // We need to reduce the reference count of the py::object to which python_dict_
462         // refers by 1, then break that reference relationship, otherwise the default
463         // destructor will destruct that py::object again while recycling class member
464         // python_dict_. A simple assignment to None satisfies all of the above.
465         python_dict_ = py::none();
466       }
467     }
468   } catch (const py::error_already_set &e) {
469     // ignore exceptions as everything could be shutting down at this point
470   }
471 #endif
472 }  // namespace dataset
473 
operator ==(const Tensor & rhs) const474 bool Tensor::operator==(const Tensor &rhs) const {
475 #ifdef ENABLE_PYTHON
476   if (type_.value() == DataType::DE_PYTHON) {  // we are holding a python object
477     if (static_cast<bool>(python_dict_) && static_cast<bool>(rhs.python_dict_) && python_dict_ == rhs.python_dict_) {
478       return true;
479     }
480     return false;
481   }
482 #endif
483   // 1. different shape 2. different type 3. one data_ is nullptr and the other is not
484   if (shape_ != rhs.shape() || type_ != rhs.type_ || (data_ == nullptr && rhs.data_ != nullptr) ||
485       (data_ != nullptr && rhs.data_ == nullptr)) {
486     return false;
487   }
488   if (data_ == nullptr && rhs.data_ == nullptr) {
489     return true;
490   }
491   // use mem compare to compare the two data, size are already verified
492   return memcmp(data_, rhs.data_, SizeInBytes()) == 0;
493 }
494 
495 // Name: PrintItemAt()
496 // Description: A function that print the value as specified by its index
PrintItemAt(const std::vector<dsize_t> & index,std::ostream & out) const497 void Tensor::PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const {
498   Status rc;
499   MS_ASSERT(data_);
500 
501   switch (type_.value()) {
502     CASE_PRINT_HEX(DataType::DE_BOOL, bool)
503 
504     CASE_PRINT_HEX(DataType::DE_INT8, int8_t)
505 
506     CASE_PRINT_HEX(DataType::DE_UINT8, uint8_t)
507 
508     CASE_PRINT(DataType::DE_INT16, int16_t)
509 
510     CASE_PRINT(DataType::DE_UINT16, uint16_t)
511 
512     CASE_PRINT(DataType::DE_INT32, int32_t)
513 
514     CASE_PRINT(DataType::DE_UINT32, uint32_t)
515 
516     CASE_PRINT(DataType::DE_INT64, int64_t)
517 
518     CASE_PRINT(DataType::DE_UINT64, uint64_t)
519 
520     CASE_PRINT(DataType::DE_FLOAT16, float16)
521 
522     CASE_PRINT(DataType::DE_FLOAT32, float)
523 
524     CASE_PRINT(DataType::DE_FLOAT64, double)
525 
526     case DataType::DE_STRING: {
527       std::string_view o{""};
528       rc = GetItemAt(&o, index);
529       out << "\"" << o << "\"";
530       break;
531     }
532     default: {
533       out << "?";
534       break;
535     }
536   }
537   if (rc.IsError()) {
538     out << rc.ToString();
539   }
540 }
541 
542 // Name: PrintRecursive()
543 // Description: A function that prints Tensor recursively, first called by print
PrintRecursive(std::ostream & out,int32_t cur_dim,const std::vector<dsize_t> & cur_index) const544 void Tensor::PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const {
545   if (cur_index.size() == shape_.Rank()) {
546     PrintItemAt(cur_index, out);
547   } else {
548     out << "[";
549     for (dsize_t i = 0; i < shape_[cur_dim]; i++) {
550       std::vector<dsize_t> new_index = cur_index;
551       new_index.push_back(i);
552       PrintRecursive(out, cur_dim + 1, new_index);
553       if (i < shape_[cur_dim] - 1) {
554         out << ",";
555       }
556     }
557     out << "]";
558   }
559 }
560 
561 // Name: Print()
562 // Description: A function that prints info about the tensor
Print(std::ostream & out) const563 void Tensor::Print(std::ostream &out) const {
564   out << "Tensor (shape: ";
565   out << shape_;
566   out << ", Type: " << type_ << ")\n";
567   if (data_) {
568     PrintRecursive(out, 0, std::vector<dsize_t>{});
569 #ifdef ENABLE_PYTHON
570   } else if (static_cast<bool>(python_dict_)) {
571     std::string s;
572     {
573       py::gil_scoped_acquire gil_acquire;
574       s = py::str(python_dict_);
575     }
576     out << s;
577 #endif
578   } else {
579     out << "[Data area is null]";
580   }
581 }
582 
PrintData(std::ostream & out) const583 void Tensor::PrintData(std::ostream &out) const {
584   if (data_) {
585     PrintRecursive(out, 0, std::vector<dsize_t>{});
586   }
587 }
588 
AllocateBuffer(const dsize_t & length)589 Status Tensor::AllocateBuffer(const dsize_t &length) {
590   RETURN_UNEXPECTED_IF_NULL(GetAllocator());
591   if (data_ == nullptr) {
592     data_ = GetAllocator()->allocate(length);
593     CHECK_FAIL_RETURN_UNEXPECTED(data_ != nullptr, "Failed to allocate memory for tensor.");
594     data_end_ = data_ + length;
595   }
596   return Status::OK();
597 }
598 
Reshape(const TensorShape & shape)599 Status Tensor::Reshape(const TensorShape &shape) {
600   if (shape.NumOfElements() == shape_.NumOfElements()) {
601     shape_ = shape;
602     return Status::OK();
603   } else {
604     std::string err = "Cannot reshape, Number of elements do not match";
605     RETURN_STATUS_UNEXPECTED(err);
606   }
607 }
608 
Invalidate()609 void Tensor::Invalidate() {
610   shape_ = TensorShape::CreateUnknownRankShape();
611   type_ = DataType(DataType::DE_UNKNOWN);
612   data_ = nullptr;
613   data_end_ = nullptr;
614 #ifdef ENABLE_PYTHON
615   if (type_.value() == DataType::DE_PYTHON) {
616     py::gil_scoped_acquire gil_acquire;
617     python_dict_ = py::none();
618   }
619   if (static_cast<bool>(python_array_)) {
620     py::gil_scoped_acquire gil_acquire;
621     python_array_ = py::none();  // let borrowed python ndarray ref - 1
622   }
623 #endif
624 }
625 
626 template <typename T>
GetItemPtr(T ** ptr,const std::vector<dsize_t> & index) const627 Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
628   RETURN_UNEXPECTED_IF_NULL(ptr);
629   if (type_.IsCompatible<T>()) {
630     if (data_ == nullptr) {
631       std::string err = "Data is not allocated yet";
632       RETURN_STATUS_UNEXPECTED(err);
633     }
634     dsize_t flat_idx;
635     RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
636     *ptr = reinterpret_cast<T *>(data_ + flat_idx * type_.SizeInBytes());
637     RETURN_UNEXPECTED_IF_NULL(*ptr);
638 
639     return Status::OK();
640   } else {
641     std::string err = "data type not compatible";
642     RETURN_STATUS_UNEXPECTED(err);
643   }
644 }
645 
GetItemPtr(uchar ** ptr,const std::vector<dsize_t> & index,offset_t * length) const646 Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset_t *length) const {
647   RETURN_UNEXPECTED_IF_NULL(ptr);
648   RETURN_UNEXPECTED_IF_NULL(length);
649   if (type_.IsString()) {
650     if (data_ == nullptr) {
651       std::string err = "Data is not allocated yet";
652       RETURN_STATUS_UNEXPECTED(err);
653     }
654     dsize_t flat_idx;
655     RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
656     offset_t length_temp = 0;
657     RETURN_IF_NOT_OK(GetStringAt(flat_idx, ptr, &length_temp));
658     *length = length_temp;
659     return Status::OK();
660   } else {
661     std::string err = "data type not compatible";
662     RETURN_STATUS_UNEXPECTED(err);
663   }
664 }
665 
StartAddrOfIndex(std::vector<dsize_t> ind,uchar ** start_addr_of_index,TensorShape * remaining)666 Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining) {
667   RETURN_UNEXPECTED_IF_NULL(start_addr_of_index);
668   RETURN_UNEXPECTED_IF_NULL(remaining);
669   if (type().IsString()) {
670     RETURN_STATUS_UNEXPECTED("StartAddrOfIndex does not support string and bytes tensors yet.");
671   }
672 
673   dsize_t flat_ind;
674   std::vector<dsize_t> t_shape = shape().AsVector();
675   std::vector<dsize_t> r(t_shape.begin() + ind.size(), t_shape.end());
676   *remaining = TensorShape(r);
677   ind.resize(this->Rank(), 0);  //  same as -> while (ind.size() < this->Rank()) ind.push_back(0);
678 
679   RETURN_IF_NOT_OK(shape_.ToFlatIndex(ind, &flat_ind));
680   // check if GetBuffer() returns null, we should flag this as an error, this sanity check will only
681   // be true is the tensor failed to allocate memory.
682   if (GetMutableBuffer() == nullptr) {
683     RETURN_STATUS_UNEXPECTED("Invalid GetBuffer in Tensor, got nullptr");
684   }
685   *start_addr_of_index = GetMutableBuffer() + flat_ind * this->type().SizeInBytes();
686   return Status::OK();
687 }
688 
InsertTensor(const std::vector<dsize_t> & ind,const std::shared_ptr<Tensor> & tensor,const bool partial_insert)689 Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor,
690                             const bool partial_insert) {
691   RETURN_UNEXPECTED_IF_NULL(tensor);
692   std::string err_msg;
693   if (partial_insert) {
694     err_msg += (ind.size() != 1)
695                  ? "[Tensor] only supports 1D insertion of elements not along the full length of the axis\n"
696                  : "";
697     err_msg +=
698       (ind.at(0) + tensor->shape().NumOfElements() > shape().NumOfElements()) ? "[Tensor] incorrect index\n" : "";
699   } else {
700     err_msg += (ind.size() + tensor->Rank() != Rank()) ? "[Tensor] incorrect index\n" : "";
701   }
702   err_msg += (type().IsString()) ? "[Tensor] Cannot insert into a tensor of type string or bytes\n" : "";
703   err_msg += (!shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : "";
704 
705   err_msg += tensor->type().SizeInBytes() != type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : "";
706   uchar *start_addr_of_ind = nullptr;
707   if (partial_insert) {
708     TensorShape remaining_shape = tensor->shape();
709     err_msg +=
710       (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : "";
711   } else {
712     TensorShape remaining_shape = TensorShape::CreateUnknownRankShape();
713     err_msg +=
714       (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : "";
715     err_msg += !(remaining_shape == tensor->shape()) ? "[Tensor] memory error\n" : "";
716   }
717 
718   if (!err_msg.empty()) {
719     MS_LOG(DEBUG) << "Insert tensor message: " << err_msg;
720     RETURN_STATUS_UNEXPECTED(err_msg);
721   } else {
722     if (start_addr_of_ind != nullptr) {
723       int ret_code =
724         memcpy_s(start_addr_of_ind, tensor->SizeInBytes(), tensor->GetMutableBuffer(), tensor->SizeInBytes());
725       if (ret_code == EOK) {
726         return Status::OK();
727       } else {
728         err_msg += "[Tensor] error in memcpy_s when inserting tensor\n";
729         MS_LOG(DEBUG) << "Tensor message: " << err_msg;
730         RETURN_STATUS_UNEXPECTED(err_msg);
731       }
732     } else {
733       RETURN_STATUS_UNEXPECTED("Failed to create memory for Tensor.");
734     }
735   }
736 }
737 
ExpandDim(const dsize_t & axis)738 Status Tensor::ExpandDim(const dsize_t &axis) {
739   if (axis > Rank()) {
740     std::string err = "Axis is out of bound";
741     RETURN_STATUS_UNEXPECTED(err);
742   }
743   if (axis == Rank()) {
744     shape_ = shape_.AppendDim(1);
745   } else {
746     shape_ = shape_.InsertDim(axis, 1);
747   }
748   return Status::OK();
749 }
750 
Strides() const751 std::vector<dsize_t> Tensor::Strides() const {
752   std::vector<dsize_t> strides = shape_.Strides();
753   uint8_t size = type_.SizeInBytes();
754   (void)std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; });
755   return strides;
756 }
757 
758 #ifdef ENABLE_PYTHON
GetBufferInfo(Tensor * t,py::buffer_info * out)759 Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
760   RETURN_UNEXPECTED_IF_NULL(t);
761   RETURN_UNEXPECTED_IF_NULL(out);
762   CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings or bytes.");
763 
764   std::string format_desc = t->type().GetPybindFormat();
765   if (format_desc.empty()) {
766     RETURN_STATUS_UNEXPECTED("Cannot convert DE type to pybind format");
767   }
768   *out = py::buffer_info(t->GetMutableBuffer(),   /* Pointer to buffer */
769                          t->type().SizeInBytes(), /* Size of one scalar */
770                          format_desc,             /* Python struct-style format descriptor */
771                          t->Rank(),               /* Number of dimensions */
772                          t->shape().AsVector(),   /* Buffer dimensions */
773                          t->Strides());
774   return Status::OK();
775 }
776 #endif
777 
to_json(nlohmann::json * out_json)778 Status Tensor::to_json(nlohmann::json *out_json) {
779   nlohmann::json args;
780   args["shape"] = shape_.AsVector();
781   args["type"] = type_.ToString();
782   if (type_ == DataType::DE_BOOL) {
783     RETURN_IF_NOT_OK(to_json_convert<bool>(&args));
784   } else if (type_ == DataType::DE_INT8) {
785     RETURN_IF_NOT_OK(to_json_convert<int8_t>(&args));
786   } else if (type_ == DataType::DE_INT16) {
787     RETURN_IF_NOT_OK(to_json_convert<int16_t>(&args));
788   } else if (type_ == DataType::DE_INT32) {
789     RETURN_IF_NOT_OK(to_json_convert<int32_t>(&args));
790   } else if (type_ == DataType::DE_INT64) {
791     RETURN_IF_NOT_OK(to_json_convert<int64_t>(&args));
792   } else if (type_ == DataType::DE_UINT8) {
793     RETURN_IF_NOT_OK(to_json_convert<uint8_t>(&args));
794   } else if (type_ == DataType::DE_UINT16) {
795     RETURN_IF_NOT_OK(to_json_convert<uint16_t>(&args));
796   } else if (type_ == DataType::DE_UINT32) {
797     RETURN_IF_NOT_OK(to_json_convert<uint32_t>(&args));
798   } else if (type_ == DataType::DE_UINT64) {
799     RETURN_IF_NOT_OK(to_json_convert<uint64_t>(&args));
800   } else if (type_ == DataType::DE_FLOAT32) {
801     RETURN_IF_NOT_OK(to_json_convert<float>(&args));
802   } else if (type_ == DataType::DE_FLOAT64) {
803     RETURN_IF_NOT_OK(to_json_convert<double>(&args));
804   } else if (type_.IsString()) {
805     std::vector<std::string> data_out;
806     for (auto it = this->begin<std::string_view>(); it != this->end<std::string_view>(); ++it) {
807       data_out.emplace_back(*it);
808     }
809     args["data"] = data_out;
810   } else {
811     return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor");
812   }
813   *out_json = args;
814   return Status::OK();
815 }
816 
817 template <typename T>
to_json_convert(nlohmann::json * args)818 Status Tensor::to_json_convert(nlohmann::json *args) {
819   std::vector<T> data_out;
820   for (auto it = this->begin<T>(); it != this->end<T>(); it++) {
821     data_out.emplace_back(*it);
822   }
823   (*args)["data"] = data_out;
824   return Status::OK();
825 }
826 
from_json(nlohmann::json op_params,std::shared_ptr<Tensor> * tensor)827 Status Tensor::from_json(nlohmann::json op_params, std::shared_ptr<Tensor> *tensor) {
828   RETURN_IF_NOT_OK(ValidateParamInJson(op_params, "shape", "Tensor"));
829   RETURN_IF_NOT_OK(ValidateParamInJson(op_params, "type", "Tensor"));
830   RETURN_IF_NOT_OK(ValidateParamInJson(op_params, "data", "Tensor"));
831   std::string type = op_params["type"];
832   std::vector<dsize_t> list = op_params["shape"];
833   TensorShape shape = TensorShape(list);
834   if (type == "bool") {
835     RETURN_IF_NOT_OK(from_json_convert<bool>(op_params["data"], shape, tensor));
836   } else if (type == "int8") {
837     RETURN_IF_NOT_OK(from_json_convert<int8_t>(op_params["data"], shape, tensor));
838   } else if (type == "int16") {
839     RETURN_IF_NOT_OK(from_json_convert<int16_t>(op_params["data"], shape, tensor));
840   } else if (type == "int32") {
841     RETURN_IF_NOT_OK(from_json_convert<int32_t>(op_params["data"], shape, tensor));
842   } else if (type == "int64") {
843     RETURN_IF_NOT_OK(from_json_convert<int64_t>(op_params["data"], shape, tensor));
844   } else if (type == "uint8") {
845     RETURN_IF_NOT_OK(from_json_convert<uint8_t>(op_params["data"], shape, tensor));
846   } else if (type == "uint16") {
847     RETURN_IF_NOT_OK(from_json_convert<uint16_t>(op_params["data"], shape, tensor));
848   } else if (type == "uint32") {
849     RETURN_IF_NOT_OK(from_json_convert<uint32_t>(op_params["data"], shape, tensor));
850   } else if (type == "uint64") {
851     RETURN_IF_NOT_OK(from_json_convert<uint64_t>(op_params["data"], shape, tensor));
852   } else if (type == "float32") {
853     RETURN_IF_NOT_OK(from_json_convert<float>(op_params["data"], shape, tensor));
854   } else if (type == "float64") {
855     RETURN_IF_NOT_OK(from_json_convert<double>(op_params["data"], shape, tensor));
856   } else if (type == "string") {
857     RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, DataType(DataType::DE_STRING), tensor));
858   } else if (type == "bytes") {
859     RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, DataType(DataType::DE_BYTES), tensor));
860   } else {
861     return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor");
862   }
863   return Status::OK();
864 }
865 
866 template <typename T>
from_json_convert(const nlohmann::json & json_data,const TensorShape & shape,std::shared_ptr<Tensor> * tensor)867 Status Tensor::from_json_convert(const nlohmann::json &json_data, const TensorShape &shape,
868                                  std::shared_ptr<Tensor> *tensor) {
869   std::vector<T> data = json_data;
870   RETURN_IF_NOT_OK(CreateFromVector(data, shape, tensor));
871   return Status::OK();
872 }
873 
from_json_convert(const nlohmann::json & json_data,const TensorShape & shape,const DataType & type,std::shared_ptr<Tensor> * tensor)874 Status Tensor::from_json_convert(const nlohmann::json &json_data, const TensorShape &shape, const DataType &type,
875                                  std::shared_ptr<Tensor> *tensor) {
876   std::vector<std::string> data = json_data;
877   RETURN_IF_NOT_OK(CreateFromVector(data, shape, type, tensor));
878   return Status::OK();
879 }
880 
881 template <typename T>
GetItemAt(T * o,const std::vector<dsize_t> & index) const882 Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
883   RETURN_UNEXPECTED_IF_NULL(o);
884   if (data_ == nullptr) {
885     RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
886   }
887   if (!type_.IsLooselyCompatible<T>()) {
888     std::string err = "Template type and Tensor type are not compatible";
889     RETURN_STATUS_UNEXPECTED(err);
890   }
891   if (type_.IsUnsignedInt()) {
892     RETURN_IF_NOT_OK(GetUnsignedIntAt<T>(o, index));
893   } else if (type_.IsSignedInt()) {
894     RETURN_IF_NOT_OK(GetSignedIntAt<T>(o, index));
895   } else if (type_.IsFloat()) {
896     RETURN_IF_NOT_OK(GetFloatAt<T>(o, index));
897   } else if (type_.IsBool()) {
898     bool *ptr = nullptr;
899     RETURN_IF_NOT_OK(GetItemPtr<bool>(&ptr, index));
900     *o = static_cast<T>(*ptr);
901   } else {
902     std::string err = "Tensor Type is unknown";
903     RETURN_STATUS_UNEXPECTED(err);
904   }
905   return Status::OK();
906 }
907 
GetItemAt(std::string_view * o,const std::vector<dsize_t> & index) const908 Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const {
909   RETURN_UNEXPECTED_IF_NULL(data_);
910   RETURN_UNEXPECTED_IF_NULL(o);
911   CHECK_FAIL_RETURN_UNEXPECTED(type_.IsString(), "Tensor type is not of string or bytes.");
912 
913   uchar *start = nullptr;
914   offset_t length = 0;
915   RETURN_IF_NOT_OK(GetItemPtr(&start, index, &length));
916   std::string_view sv{reinterpret_cast<const char *>(start), length};
917   o->swap(sv);
918   return Status::OK();
919 }
920 
921 #ifdef ENABLE_PYTHON
922 // return data as numpy, should return status
GetDataAsNumpy(py::array * data)923 Status Tensor::GetDataAsNumpy(py::array *data) {
924   RETURN_UNEXPECTED_IF_NULL(data);
925   if (type_ == DataType::DE_BOOL) {
926     *data = py::array_t<bool>(shape_.AsVector(), reinterpret_cast<bool *>(data_));
927   } else if (type_ == DataType::DE_INT8) {
928     *data = py::array_t<int8_t>(shape_.AsVector(), reinterpret_cast<int8_t *>(data_));
929   } else if (type_ == DataType::DE_INT16) {
930     *data = py::array_t<int16_t>(shape_.AsVector(), reinterpret_cast<int16_t *>(data_));
931   } else if (type_ == DataType::DE_INT32) {
932     *data = py::array_t<int32_t>(shape_.AsVector(), reinterpret_cast<int32_t *>(data_));
933   } else if (type_ == DataType::DE_INT64) {
934     *data = py::array_t<int64_t>(shape_.AsVector(), reinterpret_cast<int64_t *>(data_));
935   } else if (type_ == DataType::DE_UINT8) {
936     *data = py::array_t<uint8_t>(shape_.AsVector(), reinterpret_cast<uint8_t *>(data_));
937   } else if (type_ == DataType::DE_UINT16) {
938     *data = py::array_t<uint16_t>(shape_.AsVector(), reinterpret_cast<uint16_t *>(data_));
939   } else if (type_ == DataType::DE_UINT32) {
940     *data = py::array_t<uint32_t>(shape_.AsVector(), reinterpret_cast<uint32_t *>(data_));
941   } else if (type_ == DataType::DE_UINT64) {
942     *data = py::array_t<uint64_t>(shape_.AsVector(), reinterpret_cast<uint64_t *>(data_));
943   } else if (type_ == DataType::DE_FLOAT16) {
944     *data = py::array_t<float16>(shape_.AsVector(), reinterpret_cast<float16 *>(data_));
945   } else if (type_ == DataType::DE_FLOAT32) {
946     *data = py::array_t<float>(shape_.AsVector(), reinterpret_cast<float *>(data_));
947   } else if (type_ == DataType::DE_FLOAT64) {
948     *data = py::array_t<double>(shape_.AsVector(), reinterpret_cast<double *>(data_));
949   } else if (type_.IsString()) {
950     RETURN_IF_NOT_OK(GetDataAsNumpyStrings(data));
951   } else {
952     RETURN_STATUS_UNEXPECTED("Got unexpected type when returning numpy");
953   }
954   return Status::OK();
955 }
956 
GetDataAsNumpyStrings(py::array * data)957 Status Tensor::GetDataAsNumpyStrings(py::array *data) {
958   RETURN_UNEXPECTED_IF_NULL(data);
959   if (type_ == DataType::DE_STRING) {
960     RETURN_IF_NOT_OK(GetDataAsNumpyStrings<py::str>(data));
961   } else if (type_ == DataType::DE_BYTES) {
962     RETURN_IF_NOT_OK(GetDataAsNumpyStrings<py::bytes>(data));
963   } else {
964     RETURN_STATUS_UNEXPECTED("Can not convert a numeric Tensor to a string NumPy array.");
965   }
966   return Status::OK();
967 }
968 
GetDataAsPythonObject(py::dict * data)969 Status Tensor::GetDataAsPythonObject(py::dict *data) {
970   RETURN_UNEXPECTED_IF_NULL(data);
971   {
972     py::gil_scoped_acquire gil_acquire;
973     *data = python_dict_;
974   }
975   return Status::OK();
976 }
977 #endif
978 
Squeeze()979 void Tensor::Squeeze() { shape_ = shape_.Squeeze(); }
980 
981 template <typename T>
GetUnsignedIntAt(T * o,const std::vector<dsize_t> & index) const982 Status Tensor::GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const {
983   RETURN_UNEXPECTED_IF_NULL(o);
984   if (data_ == nullptr) {
985     RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
986   }
987   if (!type_.IsLooselyCompatible<T>()) {
988     std::string err = "Template type and Tensor type are not compatible";
989     RETURN_STATUS_UNEXPECTED(err);
990   }
991   switch (type_.value()) {
992     case DataType::DE_UINT8: {
993       uint8_t *ptr = nullptr;
994       RETURN_IF_NOT_OK(GetItemPtr<uint8_t>(&ptr, index));
995       *o = static_cast<T>(*ptr);
996       break;
997     }
998     case DataType::DE_UINT16: {
999       uint16_t *ptr = nullptr;
1000       RETURN_IF_NOT_OK(GetItemPtr<uint16_t>(&ptr, index));
1001       *o = static_cast<T>(*ptr);
1002       break;
1003     }
1004     case DataType::DE_UINT32: {
1005       uint32_t *ptr = nullptr;
1006       RETURN_IF_NOT_OK(GetItemPtr<uint32_t>(&ptr, index));
1007       *o = static_cast<T>(*ptr);
1008       break;
1009     }
1010     case DataType::DE_UINT64: {
1011       uint64_t *ptr = nullptr;
1012       RETURN_IF_NOT_OK(GetItemPtr<uint64_t>(&ptr, index));
1013       *o = static_cast<T>(*ptr);
1014       break;
1015     }
1016     default:
1017       std::string err = "Tensor Type is not an unsigned Integer";
1018       RETURN_STATUS_UNEXPECTED(err);
1019   }
1020   return Status::OK();
1021 }
1022 
1023 template <typename T>
GetSignedIntAt(T * o,const std::vector<dsize_t> & index) const1024 Status Tensor::GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const {
1025   RETURN_UNEXPECTED_IF_NULL(o);
1026   if (data_ == nullptr) {
1027     RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
1028   }
1029   if (!type_.IsLooselyCompatible<T>()) {
1030     std::string err = "Template type and Tensor type are not compatible";
1031     RETURN_STATUS_UNEXPECTED(err);
1032   }
1033   switch (type_.value()) {
1034     case DataType::DE_INT8: {
1035       int8_t *ptr = nullptr;
1036       RETURN_IF_NOT_OK(GetItemPtr<int8_t>(&ptr, index));
1037       *o = static_cast<T>(*ptr);
1038       break;
1039     }
1040     case DataType::DE_INT16: {
1041       int16_t *ptr = nullptr;
1042       RETURN_IF_NOT_OK(GetItemPtr<int16_t>(&ptr, index));
1043       *o = static_cast<T>(*ptr);
1044       break;
1045     }
1046     case DataType::DE_INT32: {
1047       int32_t *ptr = nullptr;
1048       RETURN_IF_NOT_OK(GetItemPtr<int32_t>(&ptr, index));
1049       *o = static_cast<T>(*ptr);
1050       break;
1051     }
1052     case DataType::DE_INT64: {
1053       int64_t *ptr = nullptr;
1054       RETURN_IF_NOT_OK(GetItemPtr<int64_t>(&ptr, index));
1055       *o = static_cast<T>(*ptr);
1056       break;
1057     }
1058     default:
1059       std::string err = "Tensor Type is not a signed Integer";
1060       RETURN_STATUS_UNEXPECTED(err);
1061   }
1062   return Status::OK();
1063 }
1064 
1065 template <typename T>
GetFloatAt(T * o,const std::vector<dsize_t> & index) const1066 Status Tensor::GetFloatAt(T *o, const std::vector<dsize_t> &index) const {
1067   RETURN_UNEXPECTED_IF_NULL(o);
1068   if (data_ == nullptr) {
1069     RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
1070   }
1071   if (!type_.IsLooselyCompatible<T>()) {
1072     std::string err = "Template type and Tensor type are not compatible";
1073     RETURN_STATUS_UNEXPECTED(err);
1074   }
1075   switch (type_.value()) {
1076     case DataType::DE_FLOAT16: {
1077       float16 *ptr = nullptr;
1078       RETURN_IF_NOT_OK(GetItemPtr<float16>(&ptr, index));
1079       *o = static_cast<T>(*ptr);
1080       break;
1081     }
1082     case DataType::DE_FLOAT32: {
1083       float *ptr = nullptr;
1084       RETURN_IF_NOT_OK(GetItemPtr<float>(&ptr, index));
1085       *o = static_cast<T>(*ptr);
1086       break;
1087     }
1088     case DataType::DE_FLOAT64: {
1089       double *ptr = nullptr;
1090       RETURN_IF_NOT_OK(GetItemPtr<double>(&ptr, index));
1091       *o = static_cast<T>(*ptr);
1092       break;
1093     }
1094     default:
1095       std::string err = "Tensor Type is not a float/double";
1096       RETURN_STATUS_UNEXPECTED(err);
1097   }
1098   return Status::OK();
1099 }
1100 
GetStringAt(dsize_t index,uchar ** string_start,offset_t * length) const1101 Status Tensor::GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const {
1102   CHECK_FAIL_RETURN_UNEXPECTED(type_.IsString(), "Type is not string or bytes.");
1103   RETURN_UNEXPECTED_IF_NULL(data_);
1104   RETURN_UNEXPECTED_IF_NULL(string_start);
1105   RETURN_UNEXPECTED_IF_NULL(length);
1106   auto *offset_ptr = reinterpret_cast<offset_t *>(data_);  // offsets starts here
1107   offset_t start = offset_ptr[index];
1108   *string_start = data_ + start;
1109   *length = offset_ptr[index + 1] - start - 1;  // -1 to skip the \0 from the string length
1110   return Status::OK();
1111 }
1112 
CopyLastDimAt(const std::shared_ptr<Tensor> & src,const std::vector<dsize_t> & index)1113 Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index) {
1114   RETURN_UNEXPECTED_IF_NULL(src);
1115   CHECK_FAIL_RETURN_UNEXPECTED(src->type() == type_, "Source Tensor has a different type");
1116   CHECK_FAIL_RETURN_UNEXPECTED(index.back() == 0, "Last dim in index should be 0");
1117 
1118   uint8_t type_size = type_.SizeInBytes();
1119   size_t len = std::min(src->shape()[-1], shape_[-1]) * type_size;
1120   dsize_t src_flat_ind = 0, dst_flat_ind = 0;
1121   RETURN_IF_NOT_OK(src->shape().ToFlatIndex(index, &src_flat_ind));
1122   RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &dst_flat_ind));
1123 
1124   const unsigned char *src_addr = src->GetBuffer() + src_flat_ind * type_size;
1125   unsigned char *dst_addr = GetMutableBuffer() + dst_flat_ind * type_size;
1126   CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(dst_addr, len, src_addr, len) == EOK, "memcpy error");
1127   return Status::OK();
1128 }
1129 
GetSliceOption(const SliceOption & slice_option,const int32_t & slice_index,SliceOption * slice_option_ptr)1130 Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &slice_index,
1131                               SliceOption *slice_option_ptr) {
1132   RETURN_UNEXPECTED_IF_NULL(slice_option_ptr);
1133   if (slice_option.indices_.empty() && !slice_option.slice_.valid()) {
1134     RETURN_STATUS_UNEXPECTED("Both indices and slices can not be empty.");
1135   }
1136 
1137   if (!slice_option.indices_.empty() && slice_option.slice_.valid()) {
1138     RETURN_STATUS_UNEXPECTED("Both indices and slices can not be given.");
1139   }
1140 
1141   CHECK_FAIL_RETURN_UNEXPECTED(shape_.Size() > slice_index, "Invalid shape, should be greater than slices index.");
1142   // if slice object was provided, indices should be empty. Generate indices from the slice object.
1143   if (slice_option.indices_.empty()) {
1144     // check if slice is valid
1145     mindspore::dataset::Slice slice_copy = slice_option.slice_;
1146     slice_copy.start_ = HandleNeg(slice_option.slice_.start_, shape_[slice_index]);
1147     slice_copy.stop_ = HandleNeg(slice_option.slice_.stop_, shape_[slice_index]);
1148     slice_copy.start_ = slice_copy.start_ < 0 ? 0 : slice_copy.start_;
1149     slice_copy.stop_ = slice_copy.stop_ < 0 ? 0 : slice_copy.stop_;
1150     dsize_t max_idx = shape_[slice_index];
1151     slice_copy.start_ = slice_copy.start_ > max_idx ? max_idx : slice_copy.start_;
1152     slice_copy.stop_ = slice_copy.stop_ > max_idx ? max_idx : slice_copy.stop_;
1153     *slice_option_ptr = SliceOption(slice_copy);
1154   } else {
1155     // indices validation
1156     std::vector<dsize_t> indices_copy;
1157     for (int j = 0; j < slice_option.indices_.size(); j++) {
1158       dsize_t index = HandleNeg(slice_option.indices_[j], shape_[slice_index]);
1159       CHECK_FAIL_RETURN_UNEXPECTED(index < shape_[slice_index] && index >= 0,
1160                                    "Index " + std::to_string(index) + " is out of bounds.");
1161       indices_copy.emplace_back(index);
1162     }
1163     *slice_option_ptr = SliceOption(indices_copy);
1164   }
1165   return Status::OK();
1166 }
1167 
Slice(std::shared_ptr<Tensor> * out,const std::vector<SliceOption> & slice_options)1168 Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption> &slice_options) {
1169   RETURN_UNEXPECTED_IF_NULL(out);
1170   std::vector<SliceOption> converted_slice_objects;
1171 
1172   CHECK_FAIL_RETURN_UNEXPECTED(slice_options.size() <= static_cast<size_t>(std::numeric_limits<dsize_t>::max()),
1173                                "The size of slice_options_ must not be more than \"INT64_MAX\".");
1174   for (size_t k = 0; k < slice_options.size(); k++) {
1175     SliceOption slice_option = slice_options[k];
1176 
1177     if (slice_option.all_) {
1178       auto slice = mindspore::dataset::Slice(shape_[static_cast<dsize_t>(k)]);
1179       converted_slice_objects.emplace_back(slice);
1180       continue;
1181     }
1182 
1183     CHECK_FAIL_RETURN_UNEXPECTED(k <= static_cast<size_t>(std::numeric_limits<int32_t>::max()),
1184                                  "GetSliceOption() can't function properly if there are "
1185                                  "more than \"INT32_MAX\" slice options");
1186     SliceOption slice_option_item(false);
1187     RETURN_IF_NOT_OK(GetSliceOption(slice_option, static_cast<int32_t>(k), &slice_option_item));
1188     converted_slice_objects.emplace_back(slice_option_item);
1189   }
1190 
1191   // partial slices, pass in the rest
1192   if (slice_options.size() != Rank()) {
1193     for (auto j = static_cast<dsize_t>(slice_options.size()); j < Rank(); j++) {
1194       mindspore::dataset::Slice slice = mindspore::dataset::Slice(0, shape_[j]);
1195       converted_slice_objects.emplace_back(SliceOption(slice));
1196     }
1197   }
1198 
1199   // determine final shape:
1200   TensorShape t = TensorShape({});
1201   dsize_t slice_len = slice_options.size();
1202   dsize_t slice_len_ind;
1203   for (int i = 0; i < shape_.Rank(); i++) {
1204     if (i < slice_len) {
1205       // if it's a slice
1206       if (converted_slice_objects[i].indices_.empty() && converted_slice_objects[i].slice_.step_ != 0) {
1207         slice_len_ind = (converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) /
1208                         converted_slice_objects[i].slice_.step_;
1209         if ((converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) %
1210               converted_slice_objects[i].slice_.step_ !=
1211             0) {
1212           slice_len_ind++;
1213         }
1214         // account for slices that would return no data
1215         slice_len_ind = slice_len_ind < 0 ? 0 : slice_len_ind;
1216         t = t.AppendDim(slice_len_ind);
1217       } else {
1218         // if its a vector of indices
1219         // need to introduce a way of handling indices and slices
1220         if (!converted_slice_objects[i].indices_.empty()) {
1221           t = t.AppendDim(converted_slice_objects[i].indices_.size());
1222         }
1223       }
1224     } else {
1225       // add in the rest of the dimensions
1226       slice_len_ind = shape_[i];
1227       t = t.AppendDim(slice_len_ind);
1228     }
1229   }
1230 
1231   std::vector<std::vector<dsize_t>> indices_vector = IndexGenerator(converted_slice_objects);
1232 
1233   if (indices_vector.empty()) {
1234     return CreateEmpty(t, type_, out);
1235   }
1236   if (type_.IsNumeric()) {
1237     return SliceNumeric(out, indices_vector, t);
1238   } else {
1239     return SliceString(out, indices_vector, t);
1240   }
1241 }
1242 
SliceNumeric(std::shared_ptr<Tensor> * out,const std::vector<std::vector<dsize_t>> & indices,const TensorShape & shape)1243 Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
1244                             const TensorShape &shape) {
1245   RETURN_UNEXPECTED_IF_NULL(out);
1246   RETURN_IF_NOT_OK(CreateEmpty(shape, type_, out));
1247 
1248   RETURN_UNEXPECTED_IF_NULL(out);
1249   (*out)->GetMutableBuffer();
1250   dsize_t out_index = 0;
1251   std::vector<dsize_t> dim_length = shape_.AsVector();
1252   dsize_t type_size = type_.SizeInBytes();
1253   std::vector<dsize_t> src_start = HandleNegIndices(indices[0], dim_length);
1254   dsize_t src_start_index;
1255   RETURN_IF_NOT_OK(shape_.ToFlatIndex(src_start, &src_start_index));
1256 
1257   uchar *dst_addr = (*out)->data_;
1258   dsize_t count = 1;
1259 
1260   // to handle partial slices
1261   dsize_t current_stride = shape_.Strides()[indices[0].size() - 1];
1262   auto indices_size = static_cast<dsize_t>(indices.size());
1263   for (dsize_t i = 0; i < indices_size; i++) {
1264     std::vector<dsize_t> cur_index = HandleNegIndices(indices[i], dim_length);
1265     if (i < indices_size - 1) {
1266       std::vector<dsize_t> next_index = HandleNegIndices(indices[i + 1], dim_length);
1267       dsize_t flat_idx_curr;
1268       dsize_t flat_idx_next;
1269 
1270       RETURN_IF_NOT_OK(shape_.ToFlatIndex(cur_index, &flat_idx_curr));
1271       RETURN_IF_NOT_OK(shape_.ToFlatIndex(next_index, &flat_idx_next));
1272 
1273       if (flat_idx_next == flat_idx_curr + current_stride) {
1274         count++;
1275         continue;
1276       }
1277     }
1278 
1279     int return_code = memcpy_s(dst_addr + out_index * type_size, (*out)->SizeInBytes(),
1280                                data_ + src_start_index * type_size, count * type_size * current_stride);
1281     CHECK_FAIL_RETURN_UNEXPECTED(return_code == EOK, "memcpy_s failed in SliceNumeric");
1282     out_index += count * current_stride;
1283     if (i < indices_size - 1) {
1284       src_start = HandleNegIndices(indices[i + 1], dim_length);  // next index
1285       RETURN_IF_NOT_OK(shape_.ToFlatIndex(src_start, &src_start_index));
1286     }
1287     count = 1;
1288   }
1289   return Status::OK();
1290 }
1291 
SliceString(std::shared_ptr<Tensor> * out,const std::vector<std::vector<dsize_t>> & indices,const TensorShape & shape)1292 Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
1293                            const TensorShape &shape) {
1294   RETURN_UNEXPECTED_IF_NULL(out);
1295   std::vector<dsize_t> dim_length = shape_.AsVector();
1296   std::vector<std::string> strings;
1297 
1298   for (const std::vector<dsize_t> &index : indices) {
1299     std::vector<dsize_t> cur_index = HandleNegIndices(index, dim_length);
1300     dsize_t cur_flat_index;
1301     RETURN_IF_NOT_OK(shape_.ToFlatIndex(cur_index, &cur_flat_index));
1302     std::string_view sv;
1303     RETURN_IF_NOT_OK(GetItemAt(&sv, {cur_index}));
1304     strings.emplace_back(sv);
1305   }
1306   return CreateFromVector(strings, shape, type_, out);
1307 }
1308 
CreateFromMSTensor(const MSTensor & in,TensorPtr * out)1309 Status Tensor::CreateFromMSTensor(const MSTensor &in, TensorPtr *out) {
1310   if (in.Data() == nullptr) {
1311     *out = nullptr;
1312     return Status::OK();
1313   }
1314   return Tensor::CreateFromMemory(TensorShape(in.Shape()), MSTypeToDEType(static_cast<TypeId>(in.DataType())),
1315                                   (const uchar *)(in.Data().get()), in.DataSize(), out);
1316 }
1317 }  // namespace dataset
1318 }  // namespace mindspore
1319