• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2024 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_
18 
19 #include <algorithm>
20 #include <memory>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 #if defined(_WIN32) || defined(_WIN64)
25 #undef HAVE_STDDEF_H
26 #undef HAVE_STDLIB_H
27 #endif
28 
29 #include "securec.h"
30 #ifndef ENABLE_ANDROID
31 #include "proto/example.pb.h"
32 #endif
33 #ifdef ENABLE_PYTHON
34 #include "pybind11/numpy.h"
35 #include "pybind11/pybind11.h"
36 #include "pybind11/stl.h"
37 #endif
38 
39 #include "minddata/dataset/core/data_type.h"
40 #include "minddata/dataset/core/de_tensor.h"
41 #include "minddata/dataset/core/tensor_helpers.h"
42 #include "minddata/dataset/core/tensor_shape.h"
43 #include "minddata/dataset/include/dataset/constants.h"
44 #include "minddata/dataset/util/log_adapter.h"
45 #include "minddata/dataset/util/status.h"
46 #include "utils/ms_utils.h"
47 
48 #ifdef ENABLE_PYTHON
49 namespace py = pybind11;
50 #endif
51 
52 namespace mindspore::dataset {
53 class Tensor;
54 template <typename T>
55 class Allocator;
56 
57 using offset_t = uint32_t;  // type of offset values to store strings locations
58 using TensorPtr = std::shared_ptr<Tensor>;
59 
60 /// const of the size of the offset variable
61 constexpr uint8_t kOffsetSize = sizeof(offset_t);
62 
63 class DATASET_API Tensor {
64  public:
65   Tensor() = delete;
66   Tensor(const Tensor &other) = delete;
67   Tensor &operator=(const Tensor &other) = delete;
68 
69   /// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead.
70   /// \note The shape and type information should be known and valid
71   /// \note The constructor does not allocate data
72   /// \param shape TensorShape
73   /// \param type DataType
74   Tensor(TensorShape shape, DataType type);
75 
76   /// Move constructor
77   /// \param other Tensor to be moved
78   Tensor(Tensor &&other) noexcept;
79 
80   /// Move assignment operator
81   /// \param other Tensor to be moved
82   Tensor &operator=(Tensor &&other) noexcept;
83 
84   /// Create a numeric tensor with type and shape. Items of the tensor would be uninitialized.
85   /// \param[in] shape shape of the output tensor
86   /// \param[in] type type of the output tensor
87   /// \param[out] out Generated tensor
88   /// \return Status code
89   static Status CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out);
90 
91   /// Create a numeric tensor from a pointer in memory. Length of the source data is determined from the shape and type.
92   /// Data will be copied into the new created tensor.
93   /// \param[in] shape shape of the output tensor
94   /// \param[in] type type of the output tensor
95   /// \param[in] src pointer to the source data
96   /// \param[out] out Generated tensor
97   /// \return Status code
98   static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out);
99 
100   /// Create a tensor from a pointer in memory and length. Data will be copied into the new created tensor.
101   /// \param[in] shape shape of the output tensor
102   /// \param[in] type type of the output tensor
103   /// \param[in] src pointer to the source data
104   /// \param[in] length length of the src data
105   /// \param[out] out Generated tensor
106   /// \return Status code
107   static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src,
108                                  const dsize_t &length, TensorPtr *out);
109 
110   /// Create a copy of the input tensor
111   /// \param[in] in original tensor to be copied
112   /// \param[out] out output tensor to be generated
113   /// \return Status
CreateFromTensor(const TensorPtr & in,TensorPtr * out)114   static Status CreateFromTensor(const TensorPtr &in, TensorPtr *out) {
115     return CreateFromMemory(in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes(), out);
116   }
117 
118   /// Create a copy of the input tensor
119   /// \param[in] in MSTensor to create DETensor from.
120   /// \param[in] out DETensor created.
121   /// \return Status
122   static Status CreateFromMSTensor(const MSTensor &in, TensorPtr *out);
123 
124 #ifdef ENABLE_PYTHON
125   /// Create a Tensor from a given py::array and reuse the memory of numpy
126   /// \param[in] arr py::array
127   /// \param[out] out Created tensor
128   /// \return Status Code
129   static Status CreateFromNpArray(py::array arr, TensorPtr *out);
130 
131   /// Helper function to create a tensor from a Python dictionary object
132   /// \param[in] obj pybind11 wrapper for Python dictionary object
133   /// \param[out] out Created Tensor
134   /// \return Status
135   static Status CreateFromPythonObject(py::object obj, TensorPtr *out);
136 #endif
137 
138 #ifndef ENABLE_ANDROID
139   /// Create a tensor of type DE_STRING from a BytesList.
140   /// \param[in] bytes_list protobuf's Bytelist
141   /// \param[in] shape shape of the output tensor
142   /// \param[out] out created Tensor
143   /// \return Status Code
144   static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out);
145 
146   /// Create a tensor of type UINT8 or INT8 from a BytesList.
147   /// The tensor will be padded with ' ' to reach the required pad_size.
148   /// \param[in] bytes_list protobuf's Bytelist
149   /// \param[in] shape shape of the output tensor
150   /// \param[in] type type of created tensor. Should be DE_UINT8 or INT8
151   /// \param[in] pad_size The size of the tensor after padding
152   /// \param[out] out created Tensor
153   /// \return Status Code
154   static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
155                                    const DataType &type, dsize_t pad_size, TensorPtr *out);
156 #endif
157 
158   /// Create a Tensor from a given list of values.
159   /// \param[in] items elements of the tensor
160   /// \param[in] shape shape of the output tensor
161   /// \param[out] out output argument to hold the created Tensor
162   /// \return Status Code
163   template <typename T>
CreateFromVector(const std::vector<T> & items,const TensorShape & shape,TensorPtr * out)164   static Status CreateFromVector(const std::vector<T> &items, const TensorShape &shape, TensorPtr *out) {
165     CHECK_FAIL_RETURN_UNEXPECTED(
166       static_cast<dsize_t>(items.size()) == shape.NumOfElements(),
167       "Number of elements in the vector does not match the number of elements of the shape required");
168     const DataType type = DataType::FromCType<T>();
169     // if items is empty, items_ptr would be nullptr. CreateFromMemory will handle this case.
170     const auto items_ptr = reinterpret_cast<const uchar *>(&items[0]);
171     return CreateFromMemory(shape, type, items_ptr, out);
172   }
173 
174   /// Create a 1D Tensor from a given list of values.
175   /// \param[in] items elements of the tensor
176   /// \param[out] out output argument to hold the created Tensor
177   /// \return Status Code
178   template <typename T>
CreateFromVector(const std::vector<T> & items,TensorPtr * out)179   static Status CreateFromVector(const std::vector<T> &items, TensorPtr *out) {
180     return CreateFromVector(items, TensorShape({static_cast<dsize_t>(items.size())}), out);
181   }
182 
183   /// Create a 1D boolean Tensor from a given list of boolean values.
184   /// \param[in] items elements of the tensor
185   /// \param[in] shape shape of the output tensor
186   /// \param[out] out output argument to hold the created Tensor
187   /// \return Status Code
CreateFromVector(const std::vector<bool> & items,const TensorShape & shape,TensorPtr * out)188   static Status CreateFromVector(const std::vector<bool> &items, const TensorShape &shape, TensorPtr *out) {
189     const std::vector<uint8_t> temp(items.begin(), items.end());
190     RETURN_IF_NOT_OK(CreateFromVector(temp, shape, out));
191     (*out)->type_ = DataType(DataType::DE_BOOL);
192     return Status::OK();
193   }
194 
195   /// Create a Tensor from a given list of strings.
196   /// @note: The memory layout of a Tensor of strings consists of the Offset_array followed by the strings.
197   /// The offset array will store one extra value to find the length of the last string.
198   /// OFFSET_1, OFFSET_2, ..., OFFSET_n+1, STRING_1, STRING_2, ..., STRING_n
199   /// The value of each offset is the start index of the corresponding string
200   /// Offsets is of type offset_t
201   /// strings will ne null-terminated
202   /// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
203   /// |------------------------------------------------------------------------|
204   /// |             OFFSET ARRAY                  |            STRINGS         |
205   /// |  bytes 0-3   |   bytes 4-7   | bytes 8-11 | bytes 12-15 | bytes 16-18  |
206   /// |      12      |      16       |     19     |     abc\0   |      de\0    |
207   /// |------------------------------------------------------------------------|
208   /// | first offset | second offset | end offset | first value | second value |
209   /// |------------------------------------------------------------------------|
210   /// \param[in] items elements of the tensor
211   /// \param[in] shape shape of the output tensor
212   /// \param[in] type data type of the output tensor, can only be DE_STRING or DE_BYTES
213   /// \param[out] out output argument to hold the created Tensor
214   /// \return Status Code
CreateFromVector(const std::vector<std::string> & items,const TensorShape & shape,const DataType & type,TensorPtr * out)215   static Status CreateFromVector(const std::vector<std::string> &items, const TensorShape &shape, const DataType &type,
216                                  TensorPtr *out) {
217     RETURN_UNEXPECTED_IF_NULL(out);
218     CHECK_FAIL_RETURN_UNEXPECTED(static_cast<dsize_t>(items.size()) == shape.NumOfElements(),
219                                  "The number of elements in the vector: " + std::to_string(items.size()) +
220                                    " does not match the number of elements: " + std::to_string(shape.NumOfElements()) +
221                                    " the shape required.");
222     CHECK_FAIL_RETURN_UNEXPECTED(type.IsString(), "Can not create a numeric Tensor from a string vector.");
223     *out = std::make_shared<Tensor>(TensorShape({static_cast<dsize_t>(items.size())}), type);
224     CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
225     if (items.empty()) {
226       if (shape.known()) {
227         return (*out)->Reshape(shape);
228       }
229     }
230     auto length_sum = [](size_t sum, const std::string &s) { return s.length() + sum; };
231     const dsize_t total_length = std::accumulate(items.begin(), items.end(), 0, length_sum);
232 
233     // total bytes needed = offset array + strings
234     // offset array needs to store one offset var per element + 1 extra to get the length of the last string.
235     // strings will be null-terminated --> need 1 extra byte per element
236     const size_t num_bytes = (kOffsetSize + 1) * (*out)->shape_.NumOfElements() + kOffsetSize + total_length;
237 
238     RETURN_IF_NOT_OK((*out)->AllocateBuffer(num_bytes));
239     auto offset_arr = reinterpret_cast<offset_t *>((*out)->data_);
240     const uchar *buf = (*out)->GetStringsBuffer();
241 
242     offset_t offset = buf - (*out)->data_;  // the first string will start here
243     uint32_t i = 0;
244     for (const auto &str : items) {
245       //  insert the start index of the string.
246       offset_arr[i++] = offset;
247       // insert actual string
248       const int ret_code =
249         memcpy_s((*out)->data_ + offset, num_bytes - offset, common::SafeCStr(str), str.length() + 1);
250       if (ret_code != 0) {
251         MS_LOG(ERROR) << "Cannot copy string into Tensor";
252       }
253       //  next string will be stored right after the current one.
254       offset = offset + str.length() + 1;
255     }
256     // store one more offset value so we can get the length of the last string
257     offset_arr[i] = offset;
258 
259     (*out)->data_end_ = (*out)->data_ + offset_arr[i];
260 
261     MS_ASSERT(num_bytes - offset == 0);
262     if (shape.known()) {
263       RETURN_IF_NOT_OK((*out)->Reshape(shape));
264     }
265     return Status::OK();
266   }
267 
268   // Create a string Tensor from a string vector by default.
CreateFromVector(const std::vector<std::string> & items,const TensorShape & shape,TensorPtr * out)269   static Status CreateFromVector(const std::vector<std::string> &items, const TensorShape &shape, TensorPtr *out) {
270     return CreateFromVector(items, shape, DataType(DataType::DE_STRING), out);
271   }
272 
273   /// Create a numeric scalar Tensor from the given value.
274   /// \tparam T type of value
275   /// \param[in] item value
276   /// \param[out] out Created tensor
277   /// \return Status code
278   template <typename T>
CreateScalar(const T & item,TensorPtr * out)279   static Status CreateScalar(const T &item, TensorPtr *out) {
280     const DataType type = DataType::FromCType<T>();
281     const auto item_ptr = reinterpret_cast<const uchar *>(&item);
282     return CreateFromMemory(TensorShape::CreateScalar(), type, item_ptr, out);
283   }
284 
285   /// Create a tensor from a binary file on disk.
286   /// \param[in] path file to be read
287   /// \param[out] out Created Tensor
288   /// \return Status code
289   static Status CreateFromFile(const std::string &path, TensorPtr *out);
290 
291   /// Destruct the tensor and release the memory using the allocator
292   virtual ~Tensor();
293 
294   /// Equality operator. compares tensor shape, type and data
295   /// \param[in] rhs Tensor to be compared with
296   /// \return bool
297   bool operator==(const Tensor &rhs) const;
298 
299   bool operator!=(const Tensor &rhs) const { return !((*this) == rhs); }
300 
301   Status to_json(nlohmann::json *out_json);
302 
303   template <typename T>
304   Status to_json_convert(nlohmann::json *args);
305 
306   static Status from_json(nlohmann::json op_params, std::shared_ptr<Tensor> *tensor);
307 
308   template <typename T>
309   static Status from_json_convert(const nlohmann::json &json_data, const TensorShape &shape,
310                                   std::shared_ptr<Tensor> *tensor);
311 
312   static Status from_json_convert(const nlohmann::json &json_data, const TensorShape &shape, const DataType &type,
313                                   std::shared_ptr<Tensor> *tensor);
314 
315   /// Get item located at `index`, caller needs to provide the type.
316   /// \tparam T
317   /// \param[in] index vector<dsize_t>
318   /// \return return the item specified at index
319   template <typename T>
320   Status GetItemAt(T *o, const std::vector<dsize_t> &index) const;
321 
322   /// Get string located at `index`.
323   /// \param[in] index vector<dsize_t>
324   /// \return return std::string_view specified at index
325   Status GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const;
326 
327   template <typename T>
328   Status GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const;
329 
330   template <typename T>
331   Status GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const;
332 
333   template <typename T>
334   Status GetFloatAt(T *o, const std::vector<dsize_t> &index) const;
335 
336   /// set item at location specified by index
337   /// \param[in] index
338   /// \param[in] value of type `T`
339   template <typename T>
SetItemAt(const std::vector<dsize_t> & index,const T & value)340   Status SetItemAt(const std::vector<dsize_t> &index, const T &value) {
341     T *ptr = nullptr;
342     RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index));
343     *ptr = value;
344     return Status::OK();
345   }
346 
347   /// set string item at location specified by index
348   /// \param[in] index
349   /// \param[in] value of type std::string
SetItemAt(const std::vector<dsize_t> & index,const std::string & value)350   Status SetItemAt(const std::vector<dsize_t> &index, const std::string &value) {
351     RETURN_UNEXPECTED_IF_NULL(data_);
352     uchar *ptr = nullptr;
353     offset_t length = 0;
354     RETURN_IF_NOT_OK(GetItemPtr(&ptr, index, &length));
355     if (value.length() != length) {
356       RETURN_STATUS_UNEXPECTED("Length of the new string does not match the item.");
357     }
358     const int ret_code = memcpy_s(reinterpret_cast<char *>(ptr), length, value.c_str(), length);
359     CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to set data into tensor.");
360 
361     return Status::OK();
362   }
363 
364   /// Fill tensor with zeros. Does not support string or bytes.
Zero()365   Status Zero() {
366     CHECK_FAIL_RETURN_UNEXPECTED(!type_.IsString(), "Can not fill zeros on tensor of type string or bytes.");
367     dsize_t size = SizeInBytes();
368     CHECK_FAIL_RETURN_UNEXPECTED(memset_sp(GetMutableBuffer(), size, 0, size) == 0,
369                                  "Failed to fill tensor with zeroes.");
370     return Status::OK();
371   }
372 
373   /// Fill all elements in the Tensor with the given value of type `T`. Does not support string or bytes.
374   /// \tparam T
375   /// \param value[in]
376   template <typename T>
Fill(const T & value)377   Status Fill(const T &value) {
378     CHECK_FAIL_RETURN_UNEXPECTED(!type_.IsString(), "Can not fill on tensor of type string or bytes.");
379     const int64_t cellSize = type_.SizeInBytes();
380     if ((data_ != nullptr) && type_.IsCompatible<T>()) {
381       for (dsize_t i = 0; i < Size(); i++) {
382         CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s((data_ + i * cellSize), cellSize, &value, cellSize) == 0, "memcpy err");
383       }
384       return Status::OK();
385     } else {
386       std::string err;
387       err += (data_ == nullptr) ? "data_ is nullptr \t" : "";
388       err += type_.IsCompatible<T>() ? "data type not compatible\t" : "";
389       return {StatusCode::kMDUnexpectedError, err};
390     }
391   }
392 
393   /// Getter function for shape
394   /// \return
shape()395   const TensorShape &shape() const { return shape_; }
396 
397   /// Check if tensor has data
398   /// \return bool - true if tensor is not empty
HasData()399   bool HasData() const { return data_ != nullptr; }
400 
401   /// Check if tensor is complex
402   /// \return bool - true if tensor is complex
IsComplex()403   bool IsComplex() const {
404     if (shape_.empty()) {
405       return false;
406     }
407     // check the last dim all be 2
408     return shape_[-1] == 2;
409   }
410 
411   /// Reshape the tensor. The given shape should have the same number of elements in the Tensor
412   /// \param shape
413   virtual Status Reshape(const TensorShape &shape);
414 
415   /// \return number of elements in this tensor
Size()416   dsize_t Size() const { return shape().NumOfElements(); }
417 
418   /// \return the number of bytes this tensor is needs
SizeInBytes()419   dsize_t SizeInBytes() const {
420     if (data_end_ == nullptr) {
421       return type_.SizeInBytes() * shape_.NumOfElements();
422     }
423     return data_end_ - data_;
424   }
425 
426   /// Get the exact length of string / bytes
GetStringLength(uint32_t * length)427   Status GetStringLength(uint32_t *length) const {
428     CHECK_FAIL_RETURN_UNEXPECTED(type().IsString(), "Only support to get the length of string or bytes Tensor.");
429     *length = data_end_ - data_ - (Size() + 1) * kOffsetSize - Size();
430     return Status::OK();
431   }
432 
433   /// \return the rank of the tensor
Rank()434   dsize_t Rank() const { return shape().Rank(); }
435 
436   /// Get the starting memory address as a constant for the data of the tensor.  This potentially
437   /// drives an allocation if the data area.
438   /// \return const unsigned char*
GetBuffer()439   const uchar *GetBuffer() const { return data_; }
440 
441   /// Getter of the type
442   /// \return
type()443   DataType type() const { return type_; }
444 
445   /// Provide stream operator for displaying the Tensor.
446   /// \param out Output stream.
447   /// \param tensor Tensor object to be printed.
448   /// \return Output stream.
449   friend std::ostream &operator<<(std::ostream &out, const Tensor &tensor) {
450     tensor.Print(out);
451     return out;
452   }
453 
454   /// Invalidate this Tensor by setting the type and shape to unknown and MData to null.
455   /// Calling this method will make the Tensor and its data inaccessible, use it with caution.
456   void Invalidate();
457 
458   /// Copy input tensor into self at the location index.
459   /// Index is a vector of axes which can be incomplete:
460   /// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell.
461   /// \param index
462   /// \param input
463   /// \param partial_insert: boolean to determine if insertion along the full axis is enforced
464   /// \return Status code
465   Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input,
466                       bool partial_insert = false);
467 
468   /// Find the address of the given index. Used in InsertTensor.
469   /// Example:
470   ///      Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1
471   /// \param[in] ind Element index.
472   /// \param[out] start_addr_of_index Starting address of the element index.
473   /// \param[out] remaining Remaining shape from the index.
474   /// \return Status code.
475   Status StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining);
476 
477   /// Expand the shape of the Tensor with one extra dimension.
478   /// For example, if the shape is <512,512,3>:
479   ///     *- ExpandDim(0) gives: <1,512,512,3>
480   ///     *- ExpandDim(1) gives: <512,1,512,3>
481   ///     *- ExpandDim(3) gives: <512,512,3,1>
482   /// \param axis location of the dim
483   virtual Status ExpandDim(const dsize_t &axis);
484 
485   virtual void Squeeze();
486 
487   /// Calculates the strides of the Tensor
488   /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
489   /// The strides will be {6,2,1}.
490   /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
491   /// The strides will be {24,8,4}.
492   /// \return vector of integers
493   std::vector<dsize_t> Strides() const;
494 
ToString()495   std::string ToString() const {
496     std::stringstream ss;
497     this->Print(ss);
498     return ss.str();
499   }
500 
501   /// Handle negative indices.
502   /// \param[in] index Index to be handled.
503   /// \param[in] length Axis length of this index.
504   /// \return Handled index.
HandleNeg(dsize_t index,dsize_t length)505   static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; }
506 
507   /// Handle negative indices.
508   /// \param[in] index_vector Vector of indices.
509   /// \param[in] length Length of each axis.
510   /// \return Modified vector of indices.
HandleNegIndices(const std::vector<dsize_t> & index_vector,const std::vector<dsize_t> & length)511   static inline std::vector<dsize_t> HandleNegIndices(const std::vector<dsize_t> &index_vector,
512                                                       const std::vector<dsize_t> &length) {
513     if (length.size() < index_vector.size()) {
514       MS_LOG(ERROR) << "The size of length should be greater than the shape of index_vector";
515       return {};
516     }
517     std::vector<dsize_t> indices(index_vector.size(), 0);
518     for (size_t i = 0; i < index_vector.size(); i++) {
519       indices[i] = HandleNeg(index_vector[i], length[i]);
520     }
521     return indices;
522   }
523 
524   /// Slice tensor bases on the given indices. Copy the sliced data into out tensor.
525   /// Based on the type of tensor, SliceNumeric or SliceString will be called
526   /// \param[out] out Tensor
527   /// \param[in] slice_options vector of SliceOption objects
528   /// \return Status error code
529   Status Slice(TensorPtr *out, const std::vector<mindspore::dataset::SliceOption> &slice_options);
530 
531   /// Get slice_option according to shape and index.
532   /// \param[in] slice_option input SliceOption object
533   /// \param[in] slice_index index of SliceOption object
534   /// \param[out] output slice_option with shape info
535   /// \return Status error code
536   Status GetSliceOption(const SliceOption &slice_option, const int32_t &slice_index, SliceOption *slice_option_ptr);
537 
538 #ifdef ENABLE_PYTHON
539   /// Constructs numpy array from input tensor
540   /// \param[out] data this data is the location of python data
541   /// \return Status code
542   Status GetDataAsNumpy(py::array *data);
543 
544   /// Constructs numpy array of string or bytes
545   /// \param[out] data this data is the location of python data
546   /// \return Status code
547   Status GetDataAsNumpyStrings(py::array *data);
548 
549   template <typename T>
GetDataAsNumpyStrings(py::array * data)550   Status GetDataAsNumpyStrings(py::array *data) {
551     RETURN_UNEXPECTED_IF_NULL(data);
552     if (Size() == 0) {
553       // NumPy will create empty array in type of float64 by default. So we must define the data type.
554       *data = py::array(type_.AsNumpyType(), shape_.AsVector(), nullptr);
555     } else {
556       std::vector<T> string_vector;
557       string_vector.reserve(Size());
558       // Iterate over tensor and create a vector of string_views of strings in the tensor.
559       (void)std::transform(begin<std::string_view>(), end<std::string_view>(), std::back_inserter(string_vector),
560                            [](const auto &element) { return static_cast<std::string>(element); });
561       *data = py::array(py::cast(string_vector));
562       data->resize(shape_.AsVector());
563     }
564     return Status::OK();
565   }
566 
567   static Status GetBufferInfo(Tensor *t, py::buffer_info *out);
568 
569   /// Returns the Python dictionary stored in the tensor
570   /// \param[out] data this data is the location of Python data (pybind11 wrapper)
571   /// \return Status code
572   Status GetDataAsPythonObject(py::dict *data);
573 
574 #endif
575 
SetYuvShape(const uint32_t & width,const uint32_t & widthStride,const uint32_t & height,const uint32_t & heightStride)576   Status SetYuvShape(const uint32_t &width, const uint32_t &widthStride, const uint32_t &height,
577                      const uint32_t &heightStride) {
578     const std::vector<uint32_t> tmp{width, widthStride, height, heightStride};
579     yuv_shape_ = tmp;
580     return Status::OK();
581   }
582 
GetYuvShape()583   std::vector<uint32_t> GetYuvShape() { return yuv_shape_; }
584 
585   /// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor
586   /// The order  elements  is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6
587   /// \tparam T type of values in the Tensor Iterator
588   template <typename T, bool = true>
589   class TensorIterator {
590    public:
591     using iterator_category = std::random_access_iterator_tag;
592     using value_type = T;
593     using difference_type = ptrdiff_t;
594     using pointer = T *;
595     using reference = T &;
596 
ptr_(reinterpret_cast<T * > (ptr))597     explicit TensorIterator(uchar *ptr = nullptr) : ptr_(reinterpret_cast<T *>(ptr)) {}
598 
TensorIterator(const TensorIterator<T> & raw_iterator)599     TensorIterator(const TensorIterator<T> &raw_iterator) { ptr_ = raw_iterator.ptr_; }
600 
601     ~TensorIterator() = default;
602 
603     TensorIterator<T> &operator=(const TensorIterator<T> &rhs) {
604       if (this == &rhs) {
605         return *this;
606       }
607       ptr_ = rhs.ptr_;
608       return *this;
609     }
610 
611     TensorIterator<T> &operator=(T *rhs) {
612       ptr_ = rhs;
613       return *this;
614     }
615 
616     bool operator==(const TensorIterator<T> &rhs) const { return ptr_ == rhs.ptr_; }
617 
618     bool operator!=(const TensorIterator<T> &rhs) const { return !(*this == rhs); }
619 
620     operator bool() const { return ptr_ != nullptr; }
621 
622     T &operator*() { return *ptr_; }
623 
624     const T &operator*() const { return *ptr_; }
625 
626     T *operator->() { return ptr_; }
627 
628     TensorIterator<T> &operator+=(const ptrdiff_t &inc) {
629       ptr_ += inc;
630       return *this;
631     }
632 
633     TensorIterator<T> &operator-=(const ptrdiff_t &inc) {
634       ptr_ -= inc;
635       return *this;
636     }
637 
638     TensorIterator<T> &operator++() {
639       ++ptr_;
640       return *this;
641     }
642 
643     TensorIterator<T> &operator--() {
644       --ptr_;
645       return *this;
646     }
647 
648     TensorIterator<T> operator++(int) {
649       auto temp(*this);
650       ++ptr_;
651       return temp;
652     }
653 
654     TensorIterator<T> operator--(int) {
655       auto temp(*this);
656       --ptr_;
657       return temp;
658     }
659 
660     TensorIterator<T> operator+(const ptrdiff_t &inc) {
661       auto temp(*this);
662       temp.ptr_ += inc;
663       return temp;
664     }
665 
666     TensorIterator<T> operator-(const ptrdiff_t &inc) {
667       auto temp(*this);
668       temp.ptr_ -= inc;
669       return temp;
670     }
671 
672    protected:
673     T *ptr_;
674   };
675 
676   // Specialization of TensorIterator for strings. It returns std::string_view for every item.
677   // \tparam DUMMY, used to mbe able to specialize the inner class
678   template <bool DUMMY>
679   class TensorIterator<std::string_view, DUMMY> {
680    public:
681     using iterator_category = std::random_access_iterator_tag;
682     using value_type = std::string_view;
683     using difference_type = ptrdiff_t;
684     using pointer = std::string_view *;
685     using reference = std::string_view &;
686 
687     explicit TensorIterator(const uchar *data = nullptr, dsize_t index = 0) {
688       data_ = reinterpret_cast<const char *>(data);
689       index_ = index;
690     }
691 
TensorIterator(const TensorIterator<std::string_view,DUMMY> & raw_iterator)692     TensorIterator(const TensorIterator<std::string_view, DUMMY> &raw_iterator) {
693       data_ = raw_iterator.data_;
694       index_ = raw_iterator.index_;
695     }
696 
697     ~TensorIterator() = default;
698 
699     bool operator==(const TensorIterator<std::string_view> &rhs) const {
700       return data_ == rhs.data_ && index_ == rhs.index_;
701     }
702 
703     bool operator!=(const TensorIterator<std::string_view> &rhs) { return !(*this == rhs); }
704 
705     operator bool() const { return data_ != nullptr; }
706 
707     std::string_view operator*() const {
708       const auto offset_ = reinterpret_cast<const offset_t *>(data_);
709       const offset_t start = offset_[index_];
710       const offset_t end = offset_[index_ + 1];
711       return std::string_view{data_ + start, end - start - 1};  // -1 to skip the \0 at the end
712     }
713 
714     TensorIterator<std::string_view> &operator+=(const dsize_t &inc) {
715       index_ += inc;
716       return *this;
717     }
718 
719     TensorIterator<std::string_view> &operator-=(const dsize_t &inc) {
720       index_ -= inc;
721       return *this;
722     }
723 
724     TensorIterator<std::string_view> &operator++() {
725       ++index_;
726       return *this;
727     }
728 
729     TensorIterator<std::string_view> &operator--() {
730       --index_;
731       return *this;
732     }
733 
734     TensorIterator<std::string_view> operator++(int) {
735       auto temp(*this);
736       ++index_;
737       return temp;
738     }
739 
740     TensorIterator<std::string_view> operator--(int) {
741       auto temp(*this);
742       --index_;
743       return temp;
744     }
745 
746     TensorIterator<std::string_view> operator+(const dsize_t &inc) {
747       auto temp(*this);
748       temp.index_ += inc;
749       return temp;
750     }
751 
752     TensorIterator<std::string_view> operator-(const dsize_t &inc) {
753       auto temp(*this);
754       temp.index_ -= inc;
755       return temp;
756     }
757 
758    protected:
759     dsize_t index_;
760     const char *data_;
761   };
762 
763   /// Return a TensorIterator that points to the start of the Tensor.
764   /// It's the user responsibility to use the correct type that matches the Tensor type
765   /// \tparam T The type of values in the Tensor
766   /// \return TensorIterator
767   template <typename T>
begin()768   TensorIterator<T> begin() {
769     return TensorIterator<T>(data_);
770   }
771 
772   /// Return a linear iterator that points to the place after the last element of the Tensor.
773   /// \tparam T The type of values in the Tensor
774   /// \return TensorIterator
775   template <typename T>
end()776   TensorIterator<T> end() {
777     return TensorIterator<T>(data_end_);
778   }
779 
780   /// Copies the last dimension at `index` from Tensor `src` to this Tensor.
781   /// \param[in] src Tensor
782   /// \param[in] index vector to the start of the dimension. The last dim should be 0
783   /// \return Status
784   Status CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index);
785 
786   /// Get the starting memory address for the data of the tensor.  This potentially
787   /// drives an allocation if the data is null.
788   /// \return unsigned char*
GetMutableBuffer()789   uchar *GetMutableBuffer() { return data_; }
790 
791   /// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if
792   /// the tensor's type is a string, otherwise undefined address would be returned.
793   /// \return return the address of the first string of the tensor.
GetStringsBuffer()794   uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; }
795 
796  protected:
797   /// Allocate memory for the tensor using the data_allocator
798   /// \param[in] length number of bytes to be allocated
799   /// \return Error Status
800   Status AllocateBuffer(const dsize_t &length);
801 
802   /// A function that prints Tensor recursively, first called by print
803   /// \param[in] out
804   /// \param[in] cur_dim
805   /// \param[in] cur_index
806   void PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const;
807 
808   /// Print the info and data of tensor.
809   /// \param[out] out Output stream.
810   void Print(std::ostream &out) const;
811 
812   /// Print the data of tensor.
813   /// \param[out] out Output stream.
814   void PrintData(std::ostream &out) const;
815 
816   /// A function that print the value as specified by its index
817   /// \param[in] index vector representing the index
818   /// \param[out] out
819   void PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const;
820 
821   /// Get pointer to item located at `index`, caller needs to provide the type.
822   /// \tparam T
823   /// \param[in] index vector<dsize_t>
824   /// \return return a pointer to the item specified at index of type `T`
825   template <typename T>
826   Status GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const;
827 
828   /// Get pointer to string located at `index` and the length of string
829   /// \param[in] index vector<dsize_t>
830   /// \return return a pointer to the string specified at index and the length of the string
831   Status GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset_t *length = nullptr) const;
832 
833   /// Given a flat index of an item string, return the start and length of the item.
834   /// \param[in] index Flat index of the item.
835   /// \param[out] string_start Starting address of the ths string.
836   /// \param[out] length Length of the string.
837   /// \return Status code.
838   Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const;
839 
GetAllocator()840   static const std::unique_ptr<Allocator<unsigned char>> &GetAllocator() {
841     static auto allocator = std::make_unique<Allocator<unsigned char>>(GlobalContext::Instance()->mem_pool());
842     return allocator;
843   }
844 
845   /// all access to shape_ should be via shape
846   TensorShape shape_;
847   /// data type of tensor
848   DataType type_;
849   /// pointer to the start of the physical data
850   unsigned char *data_;
851   /// pointer to the end of the physical data
852   unsigned char *data_end_ = nullptr;
853 
854   /// shape for interpretation of YUV image
855   std::vector<uint32_t> yuv_shape_;
856 
857 #ifdef ENABLE_PYTHON
858   /// Store python dictionary wrapper
859   py::object python_dict_;
860   std::string python_dict_as_str_;
861 
862   /// Hold the np.ndarray which is from python layer without memcpy cost
863   py::buffer python_array_;
864 #endif
865 
866  private:
867   friend class DETensor;
868 
869   /// Slice numeric tensors.
870   Status SliceNumeric(TensorPtr *out, const std::vector<std::vector<dsize_t>> &indices, const TensorShape &shape);
871 
872   /// Slice string tensors
873   Status SliceString(TensorPtr *out, const std::vector<std::vector<dsize_t>> &indices, const TensorShape &shape);
874 
875   /// Copy raw data of a array based on shape and strides to the destination pointer
876   /// \param dst [out] Pointer to the destination array where the content is to be copied
877   /// \param[in] src Pointer to the source of strided array to be copied
878   /// \param[in] shape shape of the source array
879   /// \param[in] strides strides of the source array
880   /// \param[in] type_size number of bytes needed to store one array element's type
881   /// \return Status Code
882   static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
883                                  std::vector<dsize_t> strides, uint8_t type_size);
884 
885 #ifdef ENABLE_PYTHON
886   /// Helper function to create a tensor from Numpy array of strings
887   /// \param[in] arr Numpy array
888   /// \param[out] out Created Tensor
889   /// \return Status
890   static Status CreateFromNpString(py::array arr, TensorPtr *out);
891 #endif
892 };
893 
894 template <>
895 inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() {
896   return TensorIterator<std::string_view>(data_, shape_.NumOfElements());
897 }
898 
899 /// Create a string scalar Tensor from the given value.
900 /// \param[in] item value
901 /// \param[out] out Created tensor
902 /// \return Status code
903 template <>
904 inline Status Tensor::CreateScalar<std::string>(const std::string &item, TensorPtr *out) {
905   RETURN_UNEXPECTED_IF_NULL(out);
906   return CreateFromVector({item}, TensorShape::CreateScalar(), DataType(DataType::DE_STRING), out);
907 }
908 }  // namespace mindspore::dataset
909 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_
910