• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/core/tensor.h"
17 
18 #include <algorithm>
19 #include <iomanip>
20 #include <iostream>
21 #include <fstream>
22 #include <functional>
23 #include <limits>
24 #include <memory>
25 #include <vector>
26 #include <utility>
27 
28 #include "minddata/dataset/core/type_id.h"
29 
30 #include "utils/ms_utils.h"
31 #include "minddata/dataset/include/dataset/constants.h"
32 
33 #ifndef ENABLE_ANDROID
34 #include "minddata/dataset/core/cv_tensor.h"
35 #endif
36 
37 #include "minddata/dataset/core/global_context.h"
38 
39 #ifdef ENABLE_PYTHON
40 #include "minddata/dataset/core/pybind_support.h"
41 namespace py = pybind11;
42 #endif
43 
44 #include "minddata/dataset/core/tensor_shape.h"
45 
46 namespace mindspore {
47 namespace dataset {
48 // Helper macros for printing tensor elements
49 #define CASE_PRINT(de_type, native_type)    \
50   case de_type: {                           \
51     native_type o;                          \
52     rc = GetItemAt<native_type>(&o, index); \
53     out << o;                               \
54     break;                                  \
55   }
56 
57 #define CASE_PRINT_HEX(de_type, native_type)                                                    \
58   case de_type: {                                                                               \
59     native_type o;                                                                              \
60     rc = GetItemAt<native_type>(&o, index);                                                     \
61     out << std::hex << std::setw(2) << std::setfill('0') << o << std::dec << std::setfill(' '); \
62     break;                                                                                      \
63   }
64 
Tensor(const TensorShape & shape,const DataType & type)65 Tensor::Tensor(const TensorShape &shape, const DataType &type) : shape_(shape), type_(type), data_(nullptr) {
66   // grab the mem pool from global context and create the allocator for char data area
67   std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool();
68   data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
69 }
70 
Tensor(Tensor && other)71 Tensor::Tensor(Tensor &&other) noexcept
72     : shape_(other.shape()),
73       type_(other.type()),
74       data_(other.GetMutableBuffer()),
75       data_end_(other.data_end_),
76       data_allocator_(std::move(other.data_allocator_)) {
77   other.Invalidate();
78 }
79 
operator =(Tensor && other)80 Tensor &Tensor::operator=(Tensor &&other) noexcept {
81   if (&other != this) {
82     shape_ = other.shape();
83     type_ = other.type();
84     data_ = other.GetMutableBuffer();
85     data_end_ = other.data_end_;
86     data_allocator_ = std::move(other.data_allocator_);
87     other.Invalidate();
88   }
89   return *this;
90 }
CreateEmpty(const TensorShape & shape,const DataType & type,TensorPtr * out)91 Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out) {
92   CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape.");
93   CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type.");
94   RETURN_UNEXPECTED_IF_NULL(out);
95   const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
96   *out = std::allocate_shared<Tensor>(*alloc, shape, type);
97   CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
98   // if it's a string tensor and it has no elements, Just initialize the shape and type.
99   if (!type.IsNumeric() && shape.NumOfElements() == 0) {
100     return Status::OK();
101   }
102 
103   CHECK_FAIL_RETURN_UNEXPECTED(type.IsNumeric(), "Number of elements is not 0. The type should be numeric.");
104 
105   int64_t byte_size = (*out)->SizeInBytes();
106 
107   // Don't allocate if we have a tensor with no elements.
108   if (byte_size != 0) {
109     RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
110   }
111   return Status::OK();
112 }
CreateFromMemory(const TensorShape & shape,const DataType & type,const uchar * src,TensorPtr * out)113 Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) {
114   RETURN_IF_NOT_OK(CreateEmpty(shape, type, out));
115   if (src != nullptr && out != nullptr) {
116     // Given the shape/type of this tensor, compute the data size and copy in the input bytes.
117     int64_t byte_size = (*out)->SizeInBytes();
118     if (byte_size == 0) {
119       return Status::OK();
120     }
121     if (byte_size < SECUREC_MEM_MAX_LEN) {
122       int ret_code = memcpy_s((*out)->data_, byte_size, src, byte_size);
123       CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to copy data into tensor.");
124     } else {
125       auto ret_code = std::memcpy((*out)->data_, src, byte_size);
126       CHECK_FAIL_RETURN_UNEXPECTED(ret_code == (*out)->data_, "Failed to copy data into tensor.");
127     }
128   }
129   return Status::OK();
130 }
131 
CreateFromMemory(const TensorShape & shape,const DataType & type,const unsigned char * src,const dsize_t & length,TensorPtr * out)132 Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const unsigned char *src,
133                                 const dsize_t &length, TensorPtr *out) {
134   RETURN_UNEXPECTED_IF_NULL(src);
135   RETURN_UNEXPECTED_IF_NULL(out);
136   const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
137   *out = std::allocate_shared<Tensor>(*alloc, shape, type);
138   CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
139   if (type.IsNumeric()) {
140     dsize_t calculated_length = (*out)->SizeInBytes();
141     CHECK_FAIL_RETURN_UNEXPECTED(calculated_length == length, "Length of source data does not match the shape.");
142   } else {
143     // min_length is the length of a tensor with empty strings
144     // min_length = the number of bytes needed to store the offsets + 1 byte for each element
145     dsize_t min_length = (shape.NumOfElements() + 1) * kOffsetSize + shape.NumOfElements();
146     CHECK_FAIL_RETURN_UNEXPECTED(min_length <= length, "Length of source data does not match the shape.");
147   }
148 
149   RETURN_IF_NOT_OK((*out)->AllocateBuffer(length));
150   if (length == 0) {
151     return Status::OK();
152   }
153   if (length < SECUREC_MEM_MAX_LEN) {
154     int ret_code = memcpy_s((*out)->data_, length, src, length);
155     CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to copy data into tensor.");
156   } else {
157     auto ret_code = std::memcpy((*out)->data_, src, length);
158     CHECK_FAIL_RETURN_UNEXPECTED(ret_code == (*out)->data_, "Failed to copy data into tensor.");
159   }
160 
161   return Status::OK();
162 }
163 
164 #ifdef ENABLE_PYTHON
CreateFromNpString(py::array arr,std::shared_ptr<Tensor> * out)165 Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
166   RETURN_UNEXPECTED_IF_NULL(out);
167   std::vector<dsize_t> shape;
168   for (dsize_t i = 0; i < arr.ndim(); i++) {
169     shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
170   }
171   arr.resize({arr.size()});  // flatten the py::array so we can iterate once
172   std::vector<std::string> strings;
173 
174   if (arr.dtype().kind() == 'U') {
175     (void)std::for_each(arr.begin(), arr.end(),
176                         [&strings](const auto &s) { strings.emplace_back(py::cast<py::str>(s)); });
177   } else {
178     (void)std::for_each(arr.begin(), arr.end(),
179                         [&strings](const auto &s) { strings.emplace_back(py::cast<py::bytes>(s)); });
180   }
181 
182   arr.resize(shape);  // resize arr back to the original shape
183 
184   return CreateFromVector(strings, TensorShape{shape}, out);
185 }
186 
CreateFromNpArray(const py::array & arr,std::shared_ptr<Tensor> * out)187 Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *out) {
188   RETURN_UNEXPECTED_IF_NULL(out);
189   if (DataType::FromNpArray(arr) == DataType::DE_STRING) {
190     return CreateFromNpString(arr, out);
191   }
192 
193   std::vector<dsize_t> shape;
194   std::vector<dsize_t> strides;
195   // check if strides are contiguous
196   bool is_strided = false;
197   dsize_t count = arr.size();
198   for (dsize_t i = 0; i < arr.ndim(); i++) {
199     shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
200     strides.push_back(static_cast<dsize_t>(arr.strides()[i]));
201     // in case of empty array num_items=0
202     if (count != 0 && shape.size() > i && shape[i] != 0) {
203       count /= shape[i];
204       if (strides[i] != arr.itemsize() * count) {
205         is_strided = true;
206       }
207     }
208   }
209 
210   unsigned char *data = static_cast<unsigned char *>(arr.request().ptr);
211 
212   if (is_strided) {
213     RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape(shape), DataType::FromNpArray(arr), out));
214     RETURN_IF_NOT_OK(CopyStridedArray((*out)->data_, data, shape, strides, (*out)->type_.SizeInBytes()));
215   } else {
216     RETURN_IF_NOT_OK(Tensor::CreateFromMemory(TensorShape(shape), DataType::FromNpArray(arr), data, out));
217   }
218   return Status::OK();
219 }
220 #endif
221 
222 #ifndef ENABLE_ANDROID
CreateFromByteList(const dataengine::BytesList & bytes_list,const TensorShape & shape,TensorPtr * out)223 Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out) {
224   RETURN_UNEXPECTED_IF_NULL(out);
225   const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
226   *out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(bytes_list.value_size())}),
227                                       DataType(DataType::DE_STRING));
228   CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
229   // total bytes needed = offset array + strings
230   // offset array needs to store one offset var per element + 1 extra to get the length of the last string.
231   // strings will be null-terminated --> need 1 extra byte per element
232   dsize_t num_bytes = (kOffsetSize) * (*out)->shape_.NumOfElements() + kOffsetSize + bytes_list.ByteSizeLong();
233 
234   (*out)->data_ = (*out)->data_allocator_->allocate(num_bytes);
235 
236   auto offset_arr = reinterpret_cast<offset_t *>((*out)->data_);
237   uchar *buf = (*out)->GetStringsBuffer();
238 
239   offset_t offset = buf - (*out)->data_;  // the first string will start here
240   int32_t i = 0;
241   for (; i < bytes_list.value_size(); i++) {
242     const std::string &str = bytes_list.value(i);
243     //  insert the start index of the string.
244     offset_arr[i] = offset;
245     // total bytes are reduced by kOffsetSize
246     num_bytes -= kOffsetSize;
247     // insert actual string
248     int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
249     CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Cannot copy string into Tensor");
250     //  next string will be stored right after the current one.
251     offset = offset + str.length() + 1;
252     // total bytes are reduced by the length of the string
253     num_bytes -= str.length() + 1;
254   }
255   // store one more offset value so we can get the length of the last string
256   // length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
257   offset_arr[i] = offset;
258 
259   (*out)->data_end_ = (*out)->data_ + offset_arr[i];
260 
261   MS_ASSERT(num_bytes == 0);
262   RETURN_IF_NOT_OK((*out)->Reshape(shape));
263   return Status::OK();
264 }
265 #endif
266 
CreateFromFile(const std::string & path,std::shared_ptr<Tensor> * out)267 Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *out) {
268   RETURN_UNEXPECTED_IF_NULL(out);
269   Path file(path);
270   if (file.IsDirectory()) {
271     RETURN_STATUS_UNEXPECTED("Invalid file found: " + path + ", should be file, but got directory.");
272   }
273   std::ifstream fs;
274   fs.open(path, std::ios::binary | std::ios::in);
275   CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Failed to open file: " + path);
276   int64_t num_bytes = fs.seekg(0, std::ios::end).tellg();
277   CHECK_FAIL_RETURN_UNEXPECTED(num_bytes < kDeMaxDim, "Invalid file to allocate tensor memory, check path: " + path);
278   CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Failed to find size of file, check path: " + path);
279   RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out));
280   int64_t written_bytes = fs.read(reinterpret_cast<char *>((*out)->GetMutableBuffer()), num_bytes).gcount();
281   if (!(written_bytes == num_bytes && fs.good())) {
282     fs.close();
283     RETURN_STATUS_UNEXPECTED("Error in writing to tensor, check path: " + path);
284   }
285   fs.close();
286   return Status::OK();
287 }
288 
289 #ifndef ENABLE_ANDROID
CreateFromByteList(const dataengine::BytesList & bytes_list,const TensorShape & shape,const DataType & type,dsize_t pad_size,TensorPtr * out)290 Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
291                                   const DataType &type, dsize_t pad_size, TensorPtr *out) {
292   RETURN_UNEXPECTED_IF_NULL(out);
293   RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, type, out));
294 
295   RETURN_UNEXPECTED_IF_NULL(out);
296   unsigned char *current_tensor_addr = (*out)->GetMutableBuffer();
297   int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size;
298 
299   for (int i = 0; i < bytes_list.value_size(); i++) {
300     // read string data into tensor
301     const std::string &current_element = bytes_list.value(i);
302     int return_code =
303       memcpy_s(current_tensor_addr, tensor_bytes_remaining, common::SafeCStr(current_element), current_element.size());
304 
305     CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed when reading bytesList element into Tensor");
306 
307     current_tensor_addr += current_element.size();
308     tensor_bytes_remaining -= current_element.size();
309 
310     // pad
311     int64_t chars_to_pad = pad_size - current_element.size();
312     return_code = memset_s(current_tensor_addr, tensor_bytes_remaining, static_cast<int>(' '), chars_to_pad);
313     CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed when padding Tensor");
314 
315     current_tensor_addr += chars_to_pad;
316     tensor_bytes_remaining -= chars_to_pad;
317   }
318 
319   return Status::OK();
320 }
321 #endif
322 
323 // Memcpy the given strided array's used part to consecutive memory
324 // Consider a 3-d array
325 // A[(i * shape[1] + j) * shape[2] + k] = B[i][j][k] = C[i * strides[0] + j * strides[1] + k * strides[2]]
326 // Here we convert array C to array A, by memcpy index by index (Note that not all elements in C is copied)
CopyStridedArray(unsigned char * dst,unsigned char * src,std::vector<dsize_t> shape,std::vector<dsize_t> strides,uint8_t type_size)327 Status Tensor::CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
328                                 std::vector<dsize_t> strides, uint8_t type_size) {
329   RETURN_UNEXPECTED_IF_NULL(dst);
330   RETURN_UNEXPECTED_IF_NULL(src);
331   dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>());
332   for (dsize_t i = 0; i < size; ++i) {
333     dsize_t offset = 0;
334     dsize_t count = i;
335     for (size_t j = 0; j < shape.size(); ++j) {
336       // convert 1d array's index to 3d array's index (A -> B)
337       CHECK_FAIL_RETURN_UNEXPECTED(shape[shape.size() - 1 - j] != 0, "Invalid data, shape can't be zero.");
338       dsize_t idx = count % shape[shape.size() - 1 - j];
339       count /= shape[shape.size() - 1 - j];
340       // calculate the raw data offset based on strides (B -> C)
341       offset += idx * strides[shape.size() - 1 - j];
342       // once count = 0, the following idxes are all zero, skip them
343       if (count == 0) {
344         break;
345       }
346     }
347     // strides already consider byte size of the data type, but dst doesn't.
348     // dst[i] = dst + i * type_size = src + offset
349     int ret_code = memcpy_s(dst + i * type_size, type_size, src + offset, type_size);
350     if (ret_code != 0) {
351       RETURN_STATUS_UNEXPECTED("Failed to copy data into Tensor.");
352     }
353   }
354   return Status::OK();
355 }
356 
357 // Name: Destructor
358 // Description: Destructor
~Tensor()359 Tensor::~Tensor() {
360   if (data_ != nullptr) {
361     if (data_allocator_ != nullptr) {
362       data_allocator_->deallocate(data_);
363       data_ = nullptr;
364       data_end_ = nullptr;
365     } else {
366       // If we didn't have an allocator, but data_ is not null then it must
367       // be a stand-alone tensor that used malloc directly.
368       free(data_);
369       data_ = nullptr;
370       data_end_ = nullptr;
371     }
372   }
373 }
374 
operator ==(const Tensor & rhs) const375 bool Tensor::operator==(const Tensor &rhs) const {
376   // 1. different shape 2. different type 3. one data_ is nullptr and the other is not
377   if (shape_ != rhs.shape() || type_ != rhs.type_ || (data_ == nullptr && rhs.data_ != nullptr) ||
378       (data_ != nullptr && rhs.data_ == nullptr)) {
379     return false;
380   }
381   if (data_ == nullptr && rhs.data_ == nullptr) {
382     return true;
383   }
384   // use mem compare to compare the two data, size are already verified
385   return memcmp(data_, rhs.data_, SizeInBytes()) == 0;
386 }
387 
388 // Name: PrintItemAt()
389 // Description: A function that print the value as specified by its index
PrintItemAt(const std::vector<dsize_t> & index,std::ostream & out) const390 void Tensor::PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const {
391   Status rc;
392   MS_ASSERT(data_);
393 
394   switch (type_.value()) {
395     CASE_PRINT_HEX(DataType::DE_BOOL, bool)
396 
397     CASE_PRINT_HEX(DataType::DE_INT8, int8_t)
398 
399     CASE_PRINT_HEX(DataType::DE_UINT8, uint8_t)
400 
401     CASE_PRINT(DataType::DE_INT16, int16_t)
402 
403     CASE_PRINT(DataType::DE_UINT16, uint16_t)
404 
405     CASE_PRINT(DataType::DE_INT32, int32_t)
406 
407     CASE_PRINT(DataType::DE_UINT32, uint32_t)
408 
409     CASE_PRINT(DataType::DE_INT64, int64_t)
410 
411     CASE_PRINT(DataType::DE_UINT64, uint64_t)
412 
413     CASE_PRINT(DataType::DE_FLOAT16, float16)
414 
415     CASE_PRINT(DataType::DE_FLOAT32, float)
416 
417     CASE_PRINT(DataType::DE_FLOAT64, double)
418 
419     case DataType::DE_STRING: {
420       std::string_view o{""};
421       rc = GetItemAt(&o, index);
422       out << "\"" << o << "\"";
423       break;
424     }
425     default: {
426       out << "?";
427       break;
428     }
429   }
430   if (rc.IsError()) {
431     out << rc.ToString();
432   }
433 }
434 
435 // Name: PrintRecursive()
436 // Description: A function that prints Tensor recursively, first called by print
PrintRecursive(std::ostream & out,int32_t cur_dim,const std::vector<dsize_t> & cur_index) const437 void Tensor::PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const {
438   if (cur_index.size() == shape_.Rank()) {
439     PrintItemAt(cur_index, out);
440   } else {
441     out << "[";
442     for (dsize_t i = 0; i < shape_[cur_dim]; i++) {
443       std::vector<dsize_t> new_index = cur_index;
444       new_index.push_back(i);
445       PrintRecursive(out, cur_dim + 1, new_index);
446       if (i < shape_[cur_dim] - 1) {
447         out << ",";
448       }
449     }
450     out << "]";
451   }
452 }
453 
454 // Name: Print()
455 // Description: A function that prints info about the tensor
Print(std::ostream & out) const456 void Tensor::Print(std::ostream &out) const {
457   out << "Tensor (shape: ";
458   out << shape_;
459   out << ", Type: " << type_ << ")\n";
460   if (data_) {
461     PrintRecursive(out, 0, std::vector<dsize_t>{});
462   } else {
463     out << "[Data area is null]";
464   }
465 }
466 
PrintData(std::ostream & out) const467 void Tensor::PrintData(std::ostream &out) const {
468   if (data_) {
469     PrintRecursive(out, 0, std::vector<dsize_t>{});
470   }
471 }
472 
AllocateBuffer(const dsize_t & length)473 Status Tensor::AllocateBuffer(const dsize_t &length) {
474   RETURN_UNEXPECTED_IF_NULL(data_allocator_);
475   if (data_ == nullptr) {
476     data_ = data_allocator_->allocate(length);
477     CHECK_FAIL_RETURN_UNEXPECTED(data_ != nullptr, "Failed to allocate memory for tensor.");
478     data_end_ = data_ + length;
479   }
480   return Status::OK();
481 }
482 
Reshape(const TensorShape & shape)483 Status Tensor::Reshape(const TensorShape &shape) {
484   if (shape.NumOfElements() == shape_.NumOfElements()) {
485     shape_ = shape;
486     return Status::OK();
487   } else {
488     std::string err = "Cannot reshape, Number of elements do not match";
489     RETURN_STATUS_UNEXPECTED(err);
490   }
491 }
492 
Invalidate()493 void Tensor::Invalidate() {
494   shape_ = TensorShape::CreateUnknownRankShape();
495   type_ = DataType(DataType::DE_UNKNOWN);
496   data_ = nullptr;
497   data_end_ = nullptr;
498   data_allocator_ = nullptr;
499 }
500 
501 template <typename T>
GetItemPtr(T ** ptr,const std::vector<dsize_t> & index) const502 Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
503   RETURN_UNEXPECTED_IF_NULL(ptr);
504   if (type_.IsCompatible<T>()) {
505     if (data_ == nullptr) {
506       std::string err = "Data is not allocated yet";
507       RETURN_STATUS_UNEXPECTED(err);
508     }
509     dsize_t flat_idx;
510     RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
511     *ptr = reinterpret_cast<T *>(data_ + flat_idx * type_.SizeInBytes());
512     RETURN_UNEXPECTED_IF_NULL(ptr);
513 
514     return Status::OK();
515   } else {
516     std::string err = "data type not compatible";
517     RETURN_STATUS_UNEXPECTED(err);
518   }
519 }
520 
GetItemPtr(uchar ** ptr,const std::vector<dsize_t> & index,offset_t * length) const521 Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset_t *length) const {
522   RETURN_UNEXPECTED_IF_NULL(ptr);
523   RETURN_UNEXPECTED_IF_NULL(length);
524   if (type_ == DataType::DE_STRING) {
525     if (data_ == nullptr) {
526       std::string err = "Data is not allocated yet";
527       RETURN_STATUS_UNEXPECTED(err);
528     }
529     dsize_t flat_idx;
530     RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
531     offset_t length_temp = 0;
532     RETURN_IF_NOT_OK(GetStringAt(flat_idx, ptr, &length_temp));
533     *length = length_temp;
534     return Status::OK();
535   } else {
536     std::string err = "data type not compatible";
537     RETURN_STATUS_UNEXPECTED(err);
538   }
539 }
540 
StartAddrOfIndex(std::vector<dsize_t> ind,uchar ** start_addr_of_index,TensorShape * remaining)541 Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining) {
542   RETURN_UNEXPECTED_IF_NULL(start_addr_of_index);
543   RETURN_UNEXPECTED_IF_NULL(remaining);
544   if (type() == DataType::DE_STRING) {
545     RETURN_STATUS_UNEXPECTED("StartAddrOfIndex does not support string tensors yet.");
546   }
547 
548   dsize_t flat_ind;
549   std::vector<dsize_t> t_shape = shape().AsVector();
550   std::vector<dsize_t> r(t_shape.begin() + ind.size(), t_shape.end());
551   *remaining = TensorShape(r);
552   ind.resize(this->Rank(), 0);  //  same as -> while (ind.size() < this->Rank()) ind.push_back(0);
553 
554   RETURN_IF_NOT_OK(shape_.ToFlatIndex(ind, &flat_ind));
555   // check if GetBuffer() returns null, we should flag this as an error, this sanity check will only
556   // be true is the tensor failed to allocate memory.
557   if (GetMutableBuffer() == nullptr) {
558     RETURN_STATUS_UNEXPECTED("Invalid GetBuffer in Tensor, got nullptr");
559   }
560   *start_addr_of_index = GetMutableBuffer() + flat_ind * this->type().SizeInBytes();
561   return Status::OK();
562 }
563 
InsertTensor(const std::vector<dsize_t> & ind,const std::shared_ptr<Tensor> & tensor,const bool partial_insert)564 Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor,
565                             const bool partial_insert) {
566   RETURN_UNEXPECTED_IF_NULL(tensor);
567   std::string err_msg;
568   if (partial_insert) {
569     err_msg += (ind.size() != 1)
570                  ? "[Tensor] only supports 1D insertion of elements not along the full length of the axis\n"
571                  : "";
572     err_msg +=
573       (ind.at(0) + tensor->shape().NumOfElements() > shape().NumOfElements()) ? "[Tensor] incorrect index\n" : "";
574   } else {
575     err_msg += (ind.size() + tensor->Rank() != Rank()) ? "[Tensor] incorrect index\n" : "";
576   }
577   err_msg += (type() == DataType::DE_STRING) ? "[Tensor] Cannot insert into a tensor of type string\n" : "";
578   err_msg += (!shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : "";
579 
580   err_msg += tensor->type().SizeInBytes() != type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : "";
581   uchar *start_addr_of_ind = nullptr;
582   if (partial_insert) {
583     TensorShape remaining_shape = tensor->shape();
584     err_msg +=
585       (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : "";
586   } else {
587     TensorShape remaining_shape = TensorShape::CreateUnknownRankShape();
588     err_msg +=
589       (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : "";
590     err_msg += !(remaining_shape == tensor->shape()) ? "[Tensor] memory error\n" : "";
591   }
592 
593   if (!err_msg.empty()) {
594     MS_LOG(DEBUG) << "Insert tensor message: " << err_msg;
595     RETURN_STATUS_UNEXPECTED(err_msg);
596   } else {
597     if (start_addr_of_ind != nullptr) {
598       int ret_code =
599         memcpy_s(start_addr_of_ind, tensor->SizeInBytes(), tensor->GetMutableBuffer(), tensor->SizeInBytes());
600       if (ret_code == 0) {
601         return Status::OK();
602       } else {
603         err_msg += "[Tensor] error in memcpy_s when inserting tensor\n";
604         MS_LOG(DEBUG) << "Tensor message: " << err_msg;
605         RETURN_STATUS_UNEXPECTED(err_msg);
606       }
607     } else {
608       RETURN_STATUS_UNEXPECTED("Failed to create memory for Tensor.");
609     }
610   }
611 }
612 
ExpandDim(const dsize_t & axis)613 Status Tensor::ExpandDim(const dsize_t &axis) {
614   if (axis > Rank()) {
615     std::string err = "Axis is out of bound";
616     RETURN_STATUS_UNEXPECTED(err);
617   }
618   if (axis == Rank()) {
619     shape_ = shape_.AppendDim(1);
620   } else {
621     shape_ = shape_.InsertDim(axis, 1);
622   }
623   return Status::OK();
624 }
625 
Strides() const626 std::vector<dsize_t> Tensor::Strides() const {
627   std::vector<dsize_t> strides = shape_.Strides();
628   uint8_t size = type_.SizeInBytes();
629   (void)std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; });
630   return strides;
631 }
632 
633 #ifdef ENABLE_PYTHON
GetBufferInfo(Tensor * t,py::buffer_info * out)634 Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
635   RETURN_UNEXPECTED_IF_NULL(t);
636   RETURN_UNEXPECTED_IF_NULL(out);
637   CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
638 
639   std::string format_desc = t->type().GetPybindFormat();
640   if (format_desc.empty()) {
641     RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format");
642   }
643   *out = py::buffer_info(t->GetMutableBuffer(),   /* Pointer to buffer */
644                          t->type().SizeInBytes(), /* Size of one scalar */
645                          format_desc,             /* Python struct-style format descriptor */
646                          t->Rank(),               /* Number of dimensions */
647                          t->shape().AsVector(),   /* Buffer dimensions */
648                          t->Strides());
649   RETURN_UNEXPECTED_IF_NULL(out);
650   return Status::OK();
651 }
652 #endif
653 
to_json(nlohmann::json * out_json)654 Status Tensor::to_json(nlohmann::json *out_json) {
655   nlohmann::json args;
656   args["shape"] = shape_.AsVector();
657   args["type"] = type_.ToString();
658   if (type_ == DataType::DE_BOOL) {
659     RETURN_IF_NOT_OK(to_json_convert<bool>(&args));
660   } else if (type_ == DataType::DE_INT8) {
661     RETURN_IF_NOT_OK(to_json_convert<int8_t>(&args));
662   } else if (type_ == DataType::DE_INT16) {
663     RETURN_IF_NOT_OK(to_json_convert<int16_t>(&args));
664   } else if (type_ == DataType::DE_INT32) {
665     RETURN_IF_NOT_OK(to_json_convert<int32_t>(&args));
666   } else if (type_ == DataType::DE_INT64) {
667     RETURN_IF_NOT_OK(to_json_convert<int64_t>(&args));
668   } else if (type_ == DataType::DE_UINT8) {
669     RETURN_IF_NOT_OK(to_json_convert<uint8_t>(&args));
670   } else if (type_ == DataType::DE_UINT16) {
671     RETURN_IF_NOT_OK(to_json_convert<uint16_t>(&args));
672   } else if (type_ == DataType::DE_UINT32) {
673     RETURN_IF_NOT_OK(to_json_convert<uint32_t>(&args));
674   } else if (type_ == DataType::DE_UINT64) {
675     RETURN_IF_NOT_OK(to_json_convert<uint64_t>(&args));
676   } else if (type_ == DataType::DE_FLOAT32) {
677     RETURN_IF_NOT_OK(to_json_convert<float>(&args));
678   } else if (type_ == DataType::DE_FLOAT64) {
679     RETURN_IF_NOT_OK(to_json_convert<double>(&args));
680   } else if (type_ == DataType::DE_STRING) {
681     std::vector<std::string> data_out;
682     for (auto it = this->begin<std::string_view>(); it != this->end<std::string_view>(); it++) {
683       data_out.emplace_back(*it);
684     }
685     args["data"] = data_out;
686   } else {
687     return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor");
688   }
689   *out_json = args;
690   return Status::OK();
691 }
692 
693 template <typename T>
to_json_convert(nlohmann::json * args)694 Status Tensor::to_json_convert(nlohmann::json *args) {
695   std::vector<T> data_out;
696   for (auto it = this->begin<T>(); it != this->end<T>(); it++) {
697     data_out.emplace_back(*it);
698   }
699   (*args)["data"] = data_out;
700   return Status::OK();
701 }
702 
from_json(nlohmann::json op_params,std::shared_ptr<Tensor> * tensor)703 Status Tensor::from_json(nlohmann::json op_params, std::shared_ptr<Tensor> *tensor) {
704   CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shape") != op_params.end(), "Failed to find shape");
705   CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("type") != op_params.end(), "Failed to find type");
706   CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("data") != op_params.end(), "Failed to find data");
707   std::string type = op_params["type"];
708   std::vector<dsize_t> list = op_params["shape"];
709   TensorShape shape = TensorShape(list);
710   if (type == "bool") {
711     RETURN_IF_NOT_OK(from_json_convert<bool>(op_params["data"], shape, tensor));
712   } else if (type == "int8") {
713     RETURN_IF_NOT_OK(from_json_convert<int8_t>(op_params["data"], shape, tensor));
714   } else if (type == "int16") {
715     RETURN_IF_NOT_OK(from_json_convert<int16_t>(op_params["data"], shape, tensor));
716   } else if (type == "int32") {
717     RETURN_IF_NOT_OK(from_json_convert<int32_t>(op_params["data"], shape, tensor));
718   } else if (type == "int64") {
719     RETURN_IF_NOT_OK(from_json_convert<int64_t>(op_params["data"], shape, tensor));
720   } else if (type == "uint8") {
721     RETURN_IF_NOT_OK(from_json_convert<uint8_t>(op_params["data"], shape, tensor));
722   } else if (type == "uint16") {
723     RETURN_IF_NOT_OK(from_json_convert<uint16_t>(op_params["data"], shape, tensor));
724   } else if (type == "uint32") {
725     RETURN_IF_NOT_OK(from_json_convert<uint32_t>(op_params["data"], shape, tensor));
726   } else if (type == "uint64") {
727     RETURN_IF_NOT_OK(from_json_convert<uint64_t>(op_params["data"], shape, tensor));
728   } else if (type == "float32") {
729     RETURN_IF_NOT_OK(from_json_convert<float>(op_params["data"], shape, tensor));
730   } else if (type == "float64") {
731     RETURN_IF_NOT_OK(from_json_convert<double>(op_params["data"], shape, tensor));
732   } else if (type == "string") {
733     RETURN_IF_NOT_OK(from_json_convert<std::string>(op_params["data"], shape, tensor));
734   } else {
735     return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor");
736   }
737   return Status::OK();
738 }
739 
740 template <typename T>
from_json_convert(nlohmann::json json_data,TensorShape shape,std::shared_ptr<Tensor> * tensor)741 Status Tensor::from_json_convert(nlohmann::json json_data, TensorShape shape, std::shared_ptr<Tensor> *tensor) {
742   std::vector<T> data = json_data;
743   RETURN_IF_NOT_OK(CreateFromVector(data, shape, tensor));
744   return Status::OK();
745 }
746 
747 template <typename T>
GetItemAt(T * o,const std::vector<dsize_t> & index) const748 Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
749   RETURN_UNEXPECTED_IF_NULL(o);
750   if (data_ == nullptr) {
751     RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
752   }
753   if (!type_.IsLooselyCompatible<T>()) {
754     std::string err = "Template type and Tensor type are not compatible";
755     RETURN_STATUS_UNEXPECTED(err);
756   }
757   if (type_.IsUnsignedInt()) {
758     RETURN_IF_NOT_OK(GetUnsignedIntAt<T>(o, index));
759   } else if (type_.IsSignedInt()) {
760     RETURN_IF_NOT_OK(GetSignedIntAt<T>(o, index));
761   } else if (type_.IsFloat()) {
762     RETURN_IF_NOT_OK(GetFloatAt<T>(o, index));
763   } else if (type_.IsBool()) {
764     bool *ptr = nullptr;
765     RETURN_IF_NOT_OK(GetItemPtr<bool>(&ptr, index));
766     *o = static_cast<T>(*ptr);
767   } else {
768     std::string err = "Tensor Type is unknown";
769     RETURN_STATUS_UNEXPECTED(err);
770   }
771   return Status::OK();
772 }
773 
GetItemAt(std::string_view * o,const std::vector<dsize_t> & index) const774 Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const {
775   RETURN_UNEXPECTED_IF_NULL(data_);
776   RETURN_UNEXPECTED_IF_NULL(o);
777   CHECK_FAIL_RETURN_UNEXPECTED(type_ == DataType::DE_STRING, "Tensor type is not a string");
778 
779   uchar *start = nullptr;
780   offset_t length = 0;
781   RETURN_IF_NOT_OK(GetItemPtr(&start, index, &length));
782   std::string_view sv{reinterpret_cast<const char *>(start)};
783   o->swap(sv);
784   return Status::OK();
785 }
786 
787 #ifdef ENABLE_PYTHON
788 // return data as numpy, should return status
GetDataAsNumpy(py::array * data)789 Status Tensor::GetDataAsNumpy(py::array *data) {
790   RETURN_UNEXPECTED_IF_NULL(data);
791   if (type_ == DataType::DE_BOOL) {
792     *data = py::array_t<bool>(shape_.AsVector(), reinterpret_cast<bool *>(data_));
793   } else if (type_ == DataType::DE_INT8) {
794     *data = py::array_t<int8_t>(shape_.AsVector(), reinterpret_cast<int8_t *>(data_));
795   } else if (type_ == DataType::DE_INT16) {
796     *data = py::array_t<int16_t>(shape_.AsVector(), reinterpret_cast<int16_t *>(data_));
797   } else if (type_ == DataType::DE_INT32) {
798     *data = py::array_t<int32_t>(shape_.AsVector(), reinterpret_cast<int32_t *>(data_));
799   } else if (type_ == DataType::DE_INT64) {
800     *data = py::array_t<int64_t>(shape_.AsVector(), reinterpret_cast<int64_t *>(data_));
801   } else if (type_ == DataType::DE_UINT8) {
802     *data = py::array_t<uint8_t>(shape_.AsVector(), reinterpret_cast<uint8_t *>(data_));
803   } else if (type_ == DataType::DE_UINT16) {
804     *data = py::array_t<uint16_t>(shape_.AsVector(), reinterpret_cast<uint16_t *>(data_));
805   } else if (type_ == DataType::DE_UINT32) {
806     *data = py::array_t<uint32_t>(shape_.AsVector(), reinterpret_cast<uint32_t *>(data_));
807   } else if (type_ == DataType::DE_UINT64) {
808     *data = py::array_t<uint64_t>(shape_.AsVector(), reinterpret_cast<uint64_t *>(data_));
809   } else if (type_ == DataType::DE_FLOAT16) {
810     *data = py::array_t<float16>(shape_.AsVector(), reinterpret_cast<float16 *>(data_));
811   } else if (type_ == DataType::DE_FLOAT32) {
812     *data = py::array_t<float>(shape_.AsVector(), reinterpret_cast<float *>(data_));
813   } else if (type_ == DataType::DE_FLOAT64) {
814     *data = py::array_t<double>(shape_.AsVector(), reinterpret_cast<double *>(data_));
815   } else if (type_ == DataType::DE_STRING) {
816     RETURN_IF_NOT_OK(GetDataAsNumpyStrings(data));
817   } else {
818     RETURN_STATUS_UNEXPECTED("Got unexpected type when returning numpy");
819   }
820   return Status::OK();
821 }
GetDataAsNumpyStrings(py::array * data)822 Status Tensor::GetDataAsNumpyStrings(py::array *data) {
823   RETURN_UNEXPECTED_IF_NULL(data);
824   auto itr = begin<std::string_view>();
825   uint64_t max_value = 0;
826   for (; itr != end<std::string_view>(); ++itr) {
827 #if defined(__APPLE__)
828     max_value = fmax((*itr).length(), max_value);
829 #else
830     max_value = std::max((*itr).length(), max_value);
831 #endif
832   }
833   // if all strings are empty, numpy stores a byte for each string |S1
834   max_value = (max_value == 0 ? 1 : max_value);
835   uint64_t total_size = shape_.NumOfElements() * max_value;
836   char *tmp_data = reinterpret_cast<char *>(data_allocator_->allocate(total_size));
837   if (tmp_data == nullptr) {
838     RETURN_STATUS_UNEXPECTED("Cannot create temp array.");
839   }
840   int ret_code = memset_s(tmp_data, total_size, 0, total_size);
841   CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to initialize temp memory");
842 
843   itr = begin<std::string_view>();
844   uint64_t i = 0;
845   for (; itr != end<std::string_view>(); itr++, i++) {
846     if (!(*itr).empty()) {
847       ret_code = memcpy_s(tmp_data + i * max_value, total_size, (*itr).data(), (*itr).length());
848       CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to copy string data.");
849     }
850   }
851   auto strides = shape_.Strides();
852   (void)std::transform(strides.begin(), strides.end(), strides.begin(),
853                        [&max_value](const auto &s) { return s * max_value; });
854   *data = py::array(py::dtype("S" + std::to_string(max_value)), shape_.AsVector(), strides, tmp_data);
855   RETURN_UNEXPECTED_IF_NULL(data);
856   data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data));
857   return Status::OK();
858 }
859 #endif
860 
Squeeze()861 void Tensor::Squeeze() { shape_ = shape_.Squeeze(); }
862 
863 template <typename T>
GetUnsignedIntAt(T * o,const std::vector<dsize_t> & index) const864 Status Tensor::GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const {
865   RETURN_UNEXPECTED_IF_NULL(o);
866   if (data_ == nullptr) {
867     RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
868   }
869   if (!type_.IsLooselyCompatible<T>()) {
870     std::string err = "Template type and Tensor type are not compatible";
871     RETURN_STATUS_UNEXPECTED(err);
872   }
873   switch (type_.value()) {
874     case DataType::DE_UINT8: {
875       uint8_t *ptr = nullptr;
876       RETURN_IF_NOT_OK(GetItemPtr<uint8_t>(&ptr, index));
877       *o = static_cast<T>(*ptr);
878       break;
879     }
880     case DataType::DE_UINT16: {
881       uint16_t *ptr = nullptr;
882       RETURN_IF_NOT_OK(GetItemPtr<uint16_t>(&ptr, index));
883       *o = static_cast<T>(*ptr);
884       break;
885     }
886     case DataType::DE_UINT32: {
887       uint32_t *ptr = nullptr;
888       RETURN_IF_NOT_OK(GetItemPtr<uint32_t>(&ptr, index));
889       *o = static_cast<T>(*ptr);
890       break;
891     }
892     case DataType::DE_UINT64: {
893       uint64_t *ptr = nullptr;
894       RETURN_IF_NOT_OK(GetItemPtr<uint64_t>(&ptr, index));
895       *o = static_cast<T>(*ptr);
896       break;
897     }
898     default:
899       std::string err = "Tensor Type is not an unsigned Integer";
900       RETURN_STATUS_UNEXPECTED(err);
901   }
902   return Status::OK();
903 }
904 
905 template <typename T>
GetSignedIntAt(T * o,const std::vector<dsize_t> & index) const906 Status Tensor::GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const {
907   RETURN_UNEXPECTED_IF_NULL(o);
908   if (data_ == nullptr) {
909     RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
910   }
911   if (!type_.IsLooselyCompatible<T>()) {
912     std::string err = "Template type and Tensor type are not compatible";
913     RETURN_STATUS_UNEXPECTED(err);
914   }
915   switch (type_.value()) {
916     case DataType::DE_INT8: {
917       int8_t *ptr = nullptr;
918       RETURN_IF_NOT_OK(GetItemPtr<int8_t>(&ptr, index));
919       *o = static_cast<T>(*ptr);
920       break;
921     }
922     case DataType::DE_INT16: {
923       int16_t *ptr = nullptr;
924       RETURN_IF_NOT_OK(GetItemPtr<int16_t>(&ptr, index));
925       *o = static_cast<T>(*ptr);
926       break;
927     }
928     case DataType::DE_INT32: {
929       int32_t *ptr = nullptr;
930       RETURN_IF_NOT_OK(GetItemPtr<int32_t>(&ptr, index));
931       *o = static_cast<T>(*ptr);
932       break;
933     }
934     case DataType::DE_INT64: {
935       int64_t *ptr = nullptr;
936       RETURN_IF_NOT_OK(GetItemPtr<int64_t>(&ptr, index));
937       *o = static_cast<T>(*ptr);
938       break;
939     }
940     default:
941       std::string err = "Tensor Type is not a signed Integer";
942       RETURN_STATUS_UNEXPECTED(err);
943   }
944   return Status::OK();
945 }
946 
947 template <typename T>
GetFloatAt(T * o,const std::vector<dsize_t> & index) const948 Status Tensor::GetFloatAt(T *o, const std::vector<dsize_t> &index) const {
949   RETURN_UNEXPECTED_IF_NULL(o);
950   if (data_ == nullptr) {
951     RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
952   }
953   if (!type_.IsLooselyCompatible<T>()) {
954     std::string err = "Template type and Tensor type are not compatible";
955     RETURN_STATUS_UNEXPECTED(err);
956   }
957   switch (type_.value()) {
958     case DataType::DE_FLOAT16: {
959       float16 *ptr = nullptr;
960       RETURN_IF_NOT_OK(GetItemPtr<float16>(&ptr, index));
961       *o = static_cast<T>(*ptr);
962       break;
963     }
964     case DataType::DE_FLOAT32: {
965       float *ptr = nullptr;
966       RETURN_IF_NOT_OK(GetItemPtr<float>(&ptr, index));
967       *o = static_cast<T>(*ptr);
968       break;
969     }
970     case DataType::DE_FLOAT64: {
971       double *ptr = nullptr;
972       RETURN_IF_NOT_OK(GetItemPtr<double>(&ptr, index));
973       *o = static_cast<T>(*ptr);
974       break;
975     }
976     default:
977       std::string err = "Tensor Type is not a float/double";
978       RETURN_STATUS_UNEXPECTED(err);
979   }
980   return Status::OK();
981 }
GetStringAt(dsize_t index,uchar ** string_start,offset_t * length) const982 Status Tensor::GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const {
983   CHECK_FAIL_RETURN_UNEXPECTED(type_ == DataType::DE_STRING, "Type is not string");
984   RETURN_UNEXPECTED_IF_NULL(data_);
985   RETURN_UNEXPECTED_IF_NULL(string_start);
986   RETURN_UNEXPECTED_IF_NULL(length);
987   auto *offset_ptr = reinterpret_cast<offset_t *>(data_);  // offsets starts here
988   offset_t start = offset_ptr[index];
989   *string_start = data_ + start;
990   *length = offset_ptr[index + 1] - start - 1;  // -1 to skip the \0 from the string length
991   return Status::OK();
992 }
CopyLastDimAt(const std::shared_ptr<Tensor> & src,const std::vector<dsize_t> & index)993 Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index) {
994   RETURN_UNEXPECTED_IF_NULL(src);
995   CHECK_FAIL_RETURN_UNEXPECTED(src->type() == type_, "Source Tensor has a different type");
996   CHECK_FAIL_RETURN_UNEXPECTED(index.back() == 0, "Last dim in index should be 0");
997 
998   uint8_t type_size = type_.SizeInBytes();
999   size_t len = std::min(src->shape()[-1], shape_[-1]) * type_size;
1000   dsize_t src_flat_ind = 0, dst_flat_ind = 0;
1001   RETURN_IF_NOT_OK(src->shape().ToFlatIndex(index, &src_flat_ind));
1002   RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &dst_flat_ind));
1003 
1004   const unsigned char *src_addr = src->GetBuffer() + src_flat_ind * type_size;
1005   unsigned char *dst_addr = GetMutableBuffer() + dst_flat_ind * type_size;
1006   CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(dst_addr, len, src_addr, len) == 0, "memcpy error");
1007   return Status::OK();
1008 }
1009 
GetSliceOption(const SliceOption & slice_option,const int32_t & slice_index,SliceOption * slice_option_ptr)1010 Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &slice_index,
1011                               SliceOption *slice_option_ptr) {
1012   RETURN_UNEXPECTED_IF_NULL(slice_option_ptr);
1013   if (slice_option.indices_.empty() && !slice_option.slice_.valid()) {
1014     RETURN_STATUS_UNEXPECTED("Both indices and slices can not be empty.");
1015   }
1016 
1017   if (!slice_option.indices_.empty() && slice_option.slice_.valid()) {
1018     RETURN_STATUS_UNEXPECTED("Both indices and slices can not be given.");
1019   }
1020 
1021   CHECK_FAIL_RETURN_UNEXPECTED(shape_.Size() > slice_index, "Invalid shape, should greater than slices index.");
1022   // if slice object was provided, indices should be empty. Generate indices from the slice object.
1023   if (slice_option.indices_.empty()) {
1024     // check if slice is valid
1025     mindspore::dataset::Slice slice_copy = slice_option.slice_;
1026     slice_copy.start_ = HandleNeg(slice_option.slice_.start_, shape_[slice_index]);
1027     slice_copy.stop_ = HandleNeg(slice_option.slice_.stop_, shape_[slice_index]);
1028     slice_copy.start_ = slice_copy.start_ < 0 ? 0 : slice_copy.start_;
1029     slice_copy.stop_ = slice_copy.stop_ < 0 ? 0 : slice_copy.stop_;
1030     dsize_t max_idx = shape_[slice_index];
1031     slice_copy.start_ = slice_copy.start_ > max_idx ? max_idx : slice_copy.start_;
1032     slice_copy.stop_ = slice_copy.stop_ > max_idx ? max_idx : slice_copy.stop_;
1033     *slice_option_ptr = SliceOption(slice_copy);
1034   } else {
1035     // indices validation
1036     std::vector<dsize_t> indices_copy;
1037     for (int j = 0; j < slice_option.indices_.size(); j++) {
1038       dsize_t index = HandleNeg(slice_option.indices_[j], shape_[slice_index]);
1039       CHECK_FAIL_RETURN_UNEXPECTED(index < shape_[slice_index] && index >= 0,
1040                                    "Index " + std::to_string(index) + " is out of bounds.");
1041       indices_copy.emplace_back(index);
1042     }
1043     *slice_option_ptr = SliceOption(indices_copy);
1044   }
1045   return Status::OK();
1046 }
1047 
Slice(std::shared_ptr<Tensor> * out,const std::vector<SliceOption> slice_options_)1048 Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption> slice_options_) {
1049   RETURN_UNEXPECTED_IF_NULL(out);
1050   std::vector<SliceOption> converted_slice_objects;
1051 
1052   CHECK_FAIL_RETURN_UNEXPECTED(slice_options_.size() <= static_cast<size_t>(std::numeric_limits<dsize_t>::max()),
1053                                "The size of slice_options_ must not be more than \"INT64_MAX\".");
1054   for (size_t k = 0; k < slice_options_.size(); k++) {
1055     SliceOption slice_option = slice_options_[k];
1056 
1057     if (slice_option.all_) {
1058       mindspore::dataset::Slice slice = mindspore::dataset::Slice(shape_[static_cast<dsize_t>(k)]);
1059       converted_slice_objects.push_back(SliceOption(slice));
1060       continue;
1061     }
1062 
1063     CHECK_FAIL_RETURN_UNEXPECTED(k <= static_cast<size_t>(std::numeric_limits<int32_t>::max()),
1064                                  "GetSliceOption() can't function properly if there are "
1065                                  "more than \"INT32_MAX\" slice options");
1066     SliceOption slice_option_item(false);
1067     RETURN_IF_NOT_OK(GetSliceOption(slice_option, static_cast<int32_t>(k), &slice_option_item));
1068     converted_slice_objects.emplace_back(slice_option_item);
1069   }
1070 
1071   // partial slices, pass in the rest
1072   if (slice_options_.size() != Rank()) {
1073     for (dsize_t j = static_cast<dsize_t>(slice_options_.size()); j < Rank(); j++) {
1074       mindspore::dataset::Slice slice = mindspore::dataset::Slice(0, shape_[j]);
1075       converted_slice_objects.emplace_back(SliceOption(slice));
1076     }
1077   }
1078 
1079   // determine final shape:
1080   TensorShape t = TensorShape({});
1081   dsize_t slice_len = slice_options_.size();
1082   dsize_t slice_len_ind;
1083   for (int i = 0; i < shape_.Rank(); i++) {
1084     if (i < slice_len) {
1085       // if it's a slice
1086       if (converted_slice_objects[i].indices_.size() == 0 && converted_slice_objects[i].slice_.step_ != 0) {
1087         slice_len_ind = (converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) /
1088                         converted_slice_objects[i].slice_.step_;
1089         if ((converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) %
1090               converted_slice_objects[i].slice_.step_ !=
1091             0) {
1092           slice_len_ind++;
1093         }
1094         // account for slices that would return no data
1095         slice_len_ind = slice_len_ind < 0 ? 0 : slice_len_ind;
1096         t = t.AppendDim(slice_len_ind);
1097       } else {
1098         // if its a vector of indices
1099         // need to introduce a way of handling indices and slices
1100         if (converted_slice_objects[i].indices_.size() >= 1) {
1101           t = t.AppendDim(converted_slice_objects[i].indices_.size());
1102         }
1103       }
1104     } else {
1105       // add in the rest of the dimensions
1106       slice_len_ind = shape_[i];
1107       t = t.AppendDim(slice_len_ind);
1108     }
1109   }
1110 
1111   std::vector<std::vector<dsize_t>> indices_vector = IndexGenerator(converted_slice_objects);
1112 
1113   if (indices_vector.empty()) {
1114     return CreateEmpty(t, type_, out);
1115   }
1116   if (type_.IsNumeric()) {
1117     return SliceNumeric(out, indices_vector, t);
1118   } else {
1119     return SliceString(out, indices_vector, t);
1120   }
1121 }
1122 
SliceNumeric(std::shared_ptr<Tensor> * out,const std::vector<std::vector<dsize_t>> & indices,const TensorShape & shape)1123 Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
1124                             const TensorShape &shape) {
1125   RETURN_UNEXPECTED_IF_NULL(out);
1126   RETURN_IF_NOT_OK(CreateEmpty(shape, type_, out));
1127 
1128   RETURN_UNEXPECTED_IF_NULL(out);
1129   (*out)->GetMutableBuffer();
1130   dsize_t out_index = 0;
1131   std::vector<dsize_t> dim_length = shape_.AsVector();
1132   dsize_t type_size = type_.SizeInBytes();
1133   std::vector<dsize_t> src_start = HandleNegIndices(indices[0], dim_length);
1134   dsize_t src_start_index;
1135   RETURN_IF_NOT_OK(shape_.ToFlatIndex(src_start, &src_start_index));
1136 
1137   uchar *dst_addr = (*out)->data_;
1138   dsize_t count = 1;
1139 
1140   // to handle partial slices
1141   dsize_t current_stride = shape_.Strides()[indices[0].size() - 1];
1142   dsize_t indices_size = static_cast<dsize_t>(indices.size());
1143   for (dsize_t i = 0; i < indices_size; i++) {
1144     std::vector<dsize_t> cur_index = HandleNegIndices(indices[i], dim_length);
1145     if (i < indices_size - 1) {
1146       std::vector<dsize_t> next_index = HandleNegIndices(indices[i + 1], dim_length);
1147       dsize_t flat_idx_curr;
1148       dsize_t flat_idx_next;
1149 
1150       RETURN_IF_NOT_OK(shape_.ToFlatIndex(cur_index, &flat_idx_curr));
1151       RETURN_IF_NOT_OK(shape_.ToFlatIndex(next_index, &flat_idx_next));
1152 
1153       if (flat_idx_next == flat_idx_curr + current_stride) {
1154         count++;
1155         continue;
1156       }
1157     }
1158 
1159     int return_code = memcpy_s(dst_addr + out_index * type_size, (*out)->SizeInBytes(),
1160                                data_ + src_start_index * type_size, count * type_size * current_stride);
1161     CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed in SliceNumeric");
1162     out_index += count * current_stride;
1163     if (i < indices_size - 1) {
1164       src_start = HandleNegIndices(indices[i + 1], dim_length);  // next index
1165       RETURN_IF_NOT_OK(shape_.ToFlatIndex(src_start, &src_start_index));
1166     }
1167     count = 1;
1168   }
1169   return Status::OK();
1170 }
SliceString(std::shared_ptr<Tensor> * out,const std::vector<std::vector<dsize_t>> & indices,const TensorShape & shape)1171 Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
1172                            const TensorShape &shape) {
1173   RETURN_UNEXPECTED_IF_NULL(out);
1174   std::vector<dsize_t> dim_length = shape_.AsVector();
1175   std::vector<std::string> strings;
1176 
1177   for (std::vector<dsize_t> index : indices) {
1178     std::vector<dsize_t> cur_index = HandleNegIndices(index, dim_length);
1179     dsize_t cur_flat_index;
1180     RETURN_IF_NOT_OK(shape_.ToFlatIndex(cur_index, &cur_flat_index));
1181     std::string_view sv;
1182     RETURN_IF_NOT_OK(GetItemAt(&sv, {cur_index}));
1183     strings.emplace_back(sv);
1184   }
1185   return CreateFromVector(strings, shape, out);
1186 }
CreateFromMSTensor(const MSTensor & in,TensorPtr * out)1187 Status Tensor::CreateFromMSTensor(const MSTensor &in, TensorPtr *out) {
1188   if (in.Data().get() == nullptr) {
1189     *out = nullptr;
1190     return Status::OK();
1191   }
1192   return Tensor::CreateFromMemory(TensorShape(in.Shape()), MSTypeToDEType(static_cast<TypeId>(in.DataType())),
1193                                   (const uchar *)(in.Data().get()), in.DataSize(), out);
1194 }
1195 
1196 }  // namespace dataset
1197 }  // namespace mindspore
1198