• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_
18 
19 #include <deque>
20 #include <memory>
21 #include <string>
22 #include <vector>
23 #include "./securec.h"
24 #ifndef ENABLE_ANDROID
25 #include "utils/log_adapter.h"
26 #else
27 #include "mindspore/lite/src/common/log_adapter.h"
28 #endif
29 #if defined(_WIN32) || defined(_WIN64)
30 #undef HAVE_STDDEF_H
31 #undef HAVE_STDLIB_H
32 #endif
33 
34 #ifdef ENABLE_PYTHON
35 #include "pybind11/numpy.h"
36 #include "pybind11/pybind11.h"
37 #include "pybind11/stl.h"
38 #endif
39 
40 #include "minddata/dataset/include/dataset/constants.h"
41 #include "minddata/dataset/core/data_type.h"
42 #include "minddata/dataset/core/tensor_helpers.h"
43 #include "minddata/dataset/core/tensor_shape.h"
44 #include "minddata/dataset/core/de_tensor.h"
45 #include "minddata/dataset/util/status.h"
46 #include "utils/ms_utils.h"
47 #ifndef ENABLE_ANDROID
48 #include "proto/example.pb.h"
49 #endif
50 
51 #ifdef ENABLE_PYTHON
52 namespace py = pybind11;
53 #endif
54 namespace mindspore {
55 namespace dataset {
56 class Tensor;
57 template <typename T>
58 class Allocator;
59 
60 using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>;
61 using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>;  // An allocator shared_ptr for Tensors
62 using offset_t = uint32_t;                                  // type of offset values to store strings locations
63 using TensorPtr = std::shared_ptr<Tensor>;
64 
65 class Tensor {
66  public:
67   Tensor() = delete;
68   Tensor(const Tensor &other) = delete;
69   Tensor &operator=(const Tensor &other) = delete;
70 
71   /// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead.
72   /// \note The shape and type information should be known and valid
73   /// \note The constructor does not allocate data
74   /// \param shape TensorShape
75   /// \param type DataType
76   Tensor(const TensorShape &shape, const DataType &type);
77 
78   /// Move constructor
79   /// \param other Tensor to be moved
80   Tensor(Tensor &&other) noexcept;
81 
82   /// Move assignment operator
83   /// \param other Tensor to be moved
84   Tensor &operator=(Tensor &&other) noexcept;
85 
86   /// Create a numeric tensor with type and shape. Items of the tensor would be uninitialized.
87   /// \param[in] shape shape of the output tensor
88   /// \param[in] type type of the output tensor
89   /// \param[out] out Generated tensor
90   /// \return Status code
91   static Status CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out);
92 
93   /// Create a numeric tensor from a pointer in memory. Length of the source data is determined from the shape and type.
94   /// Data will be copied into the new created tensor.
95   /// \param[in] shape shape of the output tensor
96   /// \param[in] type type of the output tensor
97   /// \param[in] src pointer to the source data
98   /// \param[out] out Generated tensor
99   /// \return Status code
100   static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out);
101 
102   /// Create a tensor from a pointer in memory and length. Data will be copied into the new created tensor.
103   /// \param[in] shape shape of the output tensor
104   /// \param[in] type type of the output tensor
105   /// \param[in] src pointer to the source data
106   /// \param[in] length length of the src data
107   /// \param[out] out Generated tensor
108   /// \return Status code
109   static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src,
110                                  const dsize_t &length, TensorPtr *out);
111 
112   /// Create a copy of the input tensor
113   /// \param[in] in original tensor to be copied
114   /// \param[out] out output tensor to be generated
115   /// \return Status
CreateFromTensor(const TensorPtr & in,TensorPtr * out)116   static Status CreateFromTensor(const TensorPtr &in, TensorPtr *out) {
117     return CreateFromMemory(in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes(), out);
118   }
119 
120   /// Create a copy of the input tensor
121   /// \param[in] MSTensor to create DETensorFrom
122   /// \return Status
123   static Status CreateFromMSTensor(const MSTensor &in, TensorPtr *out);
124 
125 #ifdef ENABLE_PYTHON
126   /// Create a Tensor from a given py::array
127   /// \param[in] arr py::array
128   /// \param[out] out Created tensor
129   /// \return Status Code
130   static Status CreateFromNpArray(const py::array &arr, TensorPtr *out);
131 #endif
132 
133 #ifndef ENABLE_ANDROID
134   /// Create a tensor of type DE_STRING from a BytesList.
135   /// \param[in] bytes_list protobuf's Bytelist
136   /// \param[in] shape shape of the output tensor
137   /// \param[out] out created Tensor
138   /// \return Status Code
139   static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out);
140 
141   /// Create a tensor of type UINT8 or INT8 from a BytesList.
142   /// The tensor will be padded with ' ' to reach the required pad_size.
143   /// \param[in] bytes_list protobuf's Bytelist
144   /// \param[in] shape shape of the output tensor
145   /// \param[in] type type of created tensor. Should be DE_UINT8 or INT8
146   /// \param[in] pad_size The size of the tensor after padding
147   /// \param[out] out created Tensor
148   /// \return Status Code
149   static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
150                                    const DataType &type, dsize_t pad_size, TensorPtr *out);
151 #endif
152 
153   /// Create a Tensor from a given list of values.
154   /// \tparam type of the values to be inserted.
155   /// \param[in] items elements of the tensor
156   /// \param[in] shape shape of the output tensor
157   /// \param[out] out output argument to hold the created Tensor
158   /// \return Status Code
159   template <typename T>
CreateFromVector(const std::vector<T> & items,const TensorShape & shape,TensorPtr * out)160   static Status CreateFromVector(const std::vector<T> &items, const TensorShape &shape, TensorPtr *out) {
161     CHECK_FAIL_RETURN_UNEXPECTED(
162       static_cast<dsize_t>(items.size()) == shape.NumOfElements(),
163       "Number of elements in the vector does not match the number of elements of the shape required");
164     DataType type = DataType::FromCType<T>();
165     // if items is empty, items_ptr would be nullptr. CreateFromMemory will handle this case.
166     auto items_ptr = reinterpret_cast<const uchar *>(&items[0]);
167     return CreateFromMemory(shape, type, items_ptr, out);
168   }
169 
170   /// Create a 1D Tensor from a given list of values.
171   /// \tparam type of the values to be inserted.
172   /// \param[in] items elements of the tensor
173   /// \param[out] out output argument to hold the created Tensor
174   /// \return Status Code
175   template <typename T>
CreateFromVector(const std::vector<T> & items,TensorPtr * out)176   static Status CreateFromVector(const std::vector<T> &items, TensorPtr *out) {
177     return CreateFromVector(items, TensorShape({static_cast<dsize_t>(items.size())}), out);
178   }
179 
180   /// Create a 1D boolean Tensor from a given list of boolean values.
181   /// \param[in] items elements of the tensor
182   /// \param[in] shape shape of the output tensor
183   /// \param[out] out output argument to hold the created Tensor
184   /// \return Status Code
CreateFromVector(const std::vector<bool> & items,const TensorShape & shape,TensorPtr * out)185   static Status CreateFromVector(const std::vector<bool> &items, const TensorShape &shape, TensorPtr *out) {
186     std::vector<uint8_t> temp(items.begin(), items.end());
187     RETURN_IF_NOT_OK(CreateFromVector(temp, shape, out));
188     (*out)->type_ = DataType(DataType::DE_BOOL);
189     return Status::OK();
190   }
191 
192   /// Create a numeric scalar Tensor from the given value.
193   /// \tparam T type of value
194   /// \param[in] item value
195   /// \param[out] out Created tensor
196   /// \return Status code
197   template <typename T>
CreateScalar(const T & item,TensorPtr * out)198   static Status CreateScalar(const T &item, TensorPtr *out) {
199     DataType type = DataType::FromCType<T>();
200     auto item_ptr = reinterpret_cast<const uchar *>(&item);
201     return CreateFromMemory(TensorShape::CreateScalar(), type, item_ptr, out);
202   }
203 
204   /// Create a tensor from a binary file on disk.
205   /// \param[in] path file to be read
206   /// \param[out] out Created Tensor
207   /// \return Status code
208   static Status CreateFromFile(const std::string &path, TensorPtr *out);
209 
210   /// Destruct the tensor and release the memory using the allocator
211   virtual ~Tensor();
212 
213   /// Equality operator. compares tensor shape, type and data
214   /// \param[in] rhs Tensor to be compared with
215   /// \return bool
216   bool operator==(const Tensor &rhs) const;
217 
218   bool operator!=(const Tensor &rhs) const { return !((*this) == rhs); }
219 
220   Status to_json(nlohmann::json *out_json);
221 
222   template <typename T>
223   Status to_json_convert(nlohmann::json *args);
224 
225   static Status from_json(nlohmann::json op_params, std::shared_ptr<Tensor> *tensor);
226 
227   template <typename T>
228   static Status from_json_convert(nlohmann::json json_data, TensorShape shape, std::shared_ptr<Tensor> *tensor);
229 
230   /// Get item located at `index`, caller needs to provide the type.
231   /// \tparam T
232   /// \param[in] index vector<dsize_t>
233   /// \return return the item specified at index
234   template <typename T>
235   Status GetItemAt(T *o, const std::vector<dsize_t> &index) const;
236 
237   /// Get string located at `index`.
238   /// \param[in] index vector<dsize_t>
239   /// \return return std::string_view specified at index
240   Status GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const;
241 
242   template <typename T>
243   Status GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const;
244 
245   template <typename T>
246   Status GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const;
247 
248   template <typename T>
249   Status GetFloatAt(T *o, const std::vector<dsize_t> &index) const;
250 
251   /// set item at location specified by index
252   /// \tparam `T`
253   /// \param[in] index
254   /// \param[in] value of type `T`
255   template <typename T>
SetItemAt(const std::vector<dsize_t> & index,const T & value)256   Status SetItemAt(const std::vector<dsize_t> &index, const T &value) {
257     T *ptr = nullptr;
258     RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index));
259     *ptr = value;
260     return Status::OK();
261   }
262 
263   /// set string item at location specified by index
264   /// \param[in] index
265   /// \param[in] value of type std::string
SetItemAt(const std::vector<dsize_t> & index,const std::string & value)266   Status SetItemAt(const std::vector<dsize_t> &index, const std::string &value) {
267     RETURN_UNEXPECTED_IF_NULL(data_);
268     uchar *ptr = nullptr;
269     offset_t length = 0;
270     RETURN_IF_NOT_OK(GetItemPtr(&ptr, index, &length));
271     if (value.length() != length) {
272       RETURN_STATUS_UNEXPECTED("Length of the new string does not match the item.");
273     }
274     int ret_code = memcpy_s(reinterpret_cast<char *>(ptr), length, value.c_str(), length);
275     CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to set data into tensor.");
276 
277     return Status::OK();
278   }
279 
280   /// fill tensor with Zeros. Does not support strings.
Zero()281   Status Zero() {
282     CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use Zero on tensor of strings..");
283     dsize_t size = SizeInBytes();
284     CHECK_FAIL_RETURN_UNEXPECTED(memset_sp(GetMutableBuffer(), size, 0, size) == 0,
285                                  "Failed to fill tensor with zeroes.");
286     return Status::OK();
287   }
288 
289   /// Fill all elements in the Tensor with the given value of type `T`.  Does not support strings.
290   /// \tparam T
291   /// \param value[in]
292   template <typename T>
Fill(const T & value)293   Status Fill(const T &value) {
294     CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use fill on tensor of strings.");
295     int64_t cellSize = type_.SizeInBytes();
296     if ((data_ != nullptr) && type_.IsCompatible<T>()) {
297       for (dsize_t i = 0; i < Size(); i++) {
298         CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s((data_ + i * cellSize), cellSize, &value, cellSize) == 0, "memcpy err");
299       }
300       return Status::OK();
301     } else {
302       std::string err;
303       err += (data_ == nullptr) ? "data_ is nullptr \t" : "";
304       err += type_.IsCompatible<T>() ? "data type not compatible\t" : "";
305       return Status(StatusCode::kMDUnexpectedError, err);
306     }
307   }
308 
309   /// Getter function for shape
310   /// \return
shape()311   const TensorShape &shape() const { return shape_; }
312 
313   /// Check if tensor has data
314   /// \return bool - true if tensor is not empty
HasData()315   bool HasData() const { return data_ != nullptr; }
316 
317   /// Check if tensor is complex
318   /// \return bool - true if tensor is complex
IsComplex()319   bool IsComplex() const {
320     // check the last dim all be 2
321     return shape_[-1] == 2;
322   }
323 
324   /// Reshape the tensor. The given shape should have the same number of elements in the Tensor
325   /// \param shape
326   virtual Status Reshape(const TensorShape &shape);
327 
328   /// \return number of elements in this tensor
Size()329   dsize_t Size() const { return shape().NumOfElements(); }
330 
331   /// \return the number of bytes this tensor is needs
SizeInBytes()332   dsize_t SizeInBytes() const {
333     if (data_end_ == nullptr) return type_.SizeInBytes() * shape_.NumOfElements();
334     return data_end_ - data_;
335   }
336 
337   /// \return the rank of the tensor
Rank()338   dsize_t Rank() const { return shape().Rank(); }
339 
340   /// Get the starting memory address as a constant for the data of the tensor.  This potentially
341   /// drives an allocation if the data area.
342   /// \return const unsigned char*
GetBuffer()343   const unsigned char *GetBuffer() const { return data_; }
344 
345   /// Getter of the type
346   /// \return
type()347   DataType type() const { return type_; }
348 
349   /// Provide stream operator for displaying it
350   /// \param output stream
351   /// \param so the Tensor object to be printed
352   /// \return output stream
353   friend std::ostream &operator<<(std::ostream &out, const Tensor &so) {
354     so.Print(out);
355     return out;
356   }
357 
358   /// Invalidate this Tensor by setting the type and shape to unknown and MData to null.
359   /// Calling this method will make the Tensor and its data inaccessible, use it with caution.
360   void Invalidate();
361 
362   /// Copy input tensor into self at the location index.
363   /// Index is a vector of axes which can be incomplete:
364   /// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell.
365   /// \param index
366   /// \param input
367   /// \param partial_insert: boolean to determine if insertion along the full axis is enforced
368   /// \return Status code
369   Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input,
370                       const bool partial_insert = false);
371 
372   /// Find the address of the given index. Used in InsertTensor.
373   /// Example:
374   ///      Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1
375   /// \param index  incomplete index
376   /// \param output: startAddrofIndex
377   /// \param output: remaining
378   /// \return Status code
379   Status StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining);
380 
381   /// Expand the shape of the Tensor with one extra dimension.
382   /// For example, if the shape is <512,512,3>:
383   ///     *- ExpandDim(0) gives: <1,512,512,3>
384   ///     *- ExpandDim(1) gives: <512,1,512,3>
385   ///     *- ExpandDim(3) gives: <512,512,3,1>
386   /// \param axis location of the dim
387   virtual Status ExpandDim(const dsize_t &axis);
388 
389   virtual void Squeeze();
390 
391   /// Calculates the strides of the Tensor
392   /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
393   /// The strides will be {6,2,1}.
394   /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
395   /// The strides will be {24,8,4}.
396   /// \return vector of integers
397   std::vector<dsize_t> Strides() const;
398 
ToString()399   std::string ToString() {
400     std::stringstream ss;
401     this->Print(ss);
402     return ss.str();
403   }
404 
405   /// Handle negative indices.
406   /// \param[out] out modified index
407   /// \param[in] index
408   /// \param[in] length axis length used to modify index
409   /// \return dsize_t modified index
HandleNeg(dsize_t index,dsize_t length)410   static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; }
411 
412   /// Handle negative indices for a vector of indices.
413   /// \param[out] out modified vector of indices
414   /// \param[in] index_vector vector of indices
415   /// \return std::vector<dsize_t> modified vector of indices
HandleNegIndices(std::vector<dsize_t> index_vector,std::vector<dsize_t> length)416   static inline std::vector<dsize_t> HandleNegIndices(std::vector<dsize_t> index_vector, std::vector<dsize_t> length) {
417     if (length.size() < index_vector.size()) {
418       MS_LOG(ERROR) << "The size of length should be greater than the shape of index_vector";
419       return {};
420     }
421     std::vector<dsize_t> indices(index_vector.size(), 0);
422     for (size_t i = 0; i < index_vector.size(); i++) {
423       indices[i] = HandleNeg(index_vector[i], length[i]);
424     }
425     return indices;
426   }
427 
428   /// Slice tensor bases on the given indices. Copy the sliced data into out tensor.
429   /// Based on the type of tensor, SliceNumeric or SliceString will be called
430   /// \param[out] out Tensor
431   /// \param[in] slice_options vector of SliceOption objects
432   /// \return Status error code
433   Status Slice(TensorPtr *out, const std::vector<mindspore::dataset::SliceOption> slice_options);
434 
435   /// Get slice_option according to shape and index.
436   /// \param[in] slice_option input SliceOption object
437   /// \param[in] slice_index index of SliceOption object
438   /// \param[out] output slice_option with shape info
439   /// \return Status error code
440   Status GetSliceOption(const SliceOption &slice_option, const int32_t &slice_index, SliceOption *slice_option_ptr);
441 
442 #ifdef ENABLE_PYTHON
443   /// Constructs numpy array from input tensor
444   /// \param[in] data this data is the location of python data
445   /// \return Status code
446   Status GetDataAsNumpy(py::array *data);
447 
448   Status GetDataAsNumpyStrings(py::array *data);
449 
450   static Status GetBufferInfo(Tensor *t, py::buffer_info *out);
451 #endif
452 
SetYuvShape(const uint32_t & width,const uint32_t & widthStride,const uint32_t & height,const uint32_t & heightStride)453   Status SetYuvShape(const uint32_t &width, const uint32_t &widthStride, const uint32_t &height,
454                      const uint32_t &heightStride) {
455     std::vector<uint32_t> tmp{width, widthStride, height, heightStride};
456     yuv_shape_ = tmp;
457     return Status::OK();
458   }
459 
GetYuvShape()460   std::vector<uint32_t> GetYuvShape() { return yuv_shape_; }
461 
462   /// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor
463   /// The order  elements  is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6
464   /// \tparam T type of values in the Tensor Iterator
465   template <typename T, bool = true>
466   class TensorIterator {
467    public:
468     using iterator_category = std::random_access_iterator_tag;
469     using value_type = T;
470     using difference_type = ptrdiff_t;
471     using pointer = T *;
472     using reference = T &;
473 
474     explicit TensorIterator(uchar *ptr = nullptr) { ptr_ = reinterpret_cast<T *>(ptr); }
475 
TensorIterator(const TensorIterator<T> & raw_iterator)476     TensorIterator(const TensorIterator<T> &raw_iterator) { ptr_ = raw_iterator.ptr_; }
477 
478     ~TensorIterator() = default;
479 
480     TensorIterator<T> &operator=(const TensorIterator<T> &rhs) {
481       ptr_ = rhs.ptr_;
482       return *this;
483     }
484 
485     TensorIterator<T> &operator=(T *rhs) {
486       ptr_ = rhs;
487       return *this;
488     }
489 
490     bool operator==(const TensorIterator<T> &rhs) { return ptr_ == rhs.ptr_; }
491 
492     bool operator!=(const TensorIterator<T> &rhs) { return !(*this == rhs); }
493 
494     operator bool() const { return ptr_ != nullptr; }
495 
496     T &operator*() { return *ptr_; }
497 
498     const T &operator*() const { return *ptr_; }
499 
500     T *operator->() { return ptr_; }
501 
502     TensorIterator<T> &operator+=(const ptrdiff_t &inc) {
503       ptr_ += inc;
504       return *this;
505     }
506 
507     TensorIterator<T> &operator-=(const ptrdiff_t &inc) {
508       ptr_ -= inc;
509       return *this;
510     }
511 
512     TensorIterator<T> &operator++() {
513       ++ptr_;
514       return *this;
515     }
516 
517     TensorIterator<T> &operator--() {
518       --ptr_;
519       return *this;
520     }
521 
522     TensorIterator<T> operator++(int) {
523       auto temp(*this);
524       ++ptr_;
525       return temp;
526     }
527 
528     TensorIterator<T> operator--(int) {
529       auto temp(*this);
530       --ptr_;
531       return temp;
532     }
533 
534     TensorIterator<T> operator+(const ptrdiff_t &inc) {
535       auto oldPtr = ptr_;
536       ptr_ += inc;
537       auto temp(*this);
538       ptr_ = oldPtr;
539       return temp;
540     }
541 
542     TensorIterator<T> operator-(const ptrdiff_t &inc) {
543       auto oldPtr = ptr_;
544       ptr_ -= inc;
545       auto temp(*this);
546       ptr_ = oldPtr;
547       return temp;
548     }
549 
550    protected:
551     T *ptr_;
552   };
553 
554   // Specialization of TensorIterator for strings. It returns std::string_view for every item.
555   // \tparam DUMMY, used to mbe able to specialize the inner class
556   template <bool DUMMY>
557   class TensorIterator<std::string_view, DUMMY> {
558    public:
559     using iterator_category = std::random_access_iterator_tag;
560     using value_type = std::string_view;
561     using difference_type = ptrdiff_t;
562     using pointer = std::string_view *;
563     using reference = std::string_view &;
564 
565     explicit TensorIterator(uchar *data = nullptr, dsize_t index = 0) {
566       data_ = reinterpret_cast<const char *>(data);
567       index_ = index;
568     }
569 
TensorIterator(const TensorIterator<std::string_view,DUMMY> & raw_iterator)570     TensorIterator(const TensorIterator<std::string_view, DUMMY> &raw_iterator) {
571       data_ = raw_iterator.data_;
572       index_ = raw_iterator.index_;
573     }
574 
575     ~TensorIterator() = default;
576 
577     bool operator==(const TensorIterator<std::string_view> &rhs) { return data_ == rhs.data_ && index_ == rhs.index_; }
578 
579     bool operator!=(const TensorIterator<std::string_view> &rhs) { return !(*this == rhs); }
580 
581     operator bool() const { return data_ != nullptr; }
582 
583     std::string_view operator*() const {
584       auto offset_ = reinterpret_cast<const offset_t *>(data_);
585       offset_t start = offset_[index_];
586       return std::string_view{data_ + start};
587     }
588 
589     TensorIterator<std::string_view> &operator+=(const dsize_t &inc) {
590       index_ += inc;
591       return *this;
592     }
593 
594     TensorIterator<std::string_view> &operator-=(const dsize_t &inc) {
595       index_ -= inc;
596       return *this;
597     }
598 
599     TensorIterator<std::string_view> &operator++() {
600       ++index_;
601       return *this;
602     }
603 
604     TensorIterator<std::string_view> &operator--() {
605       --index_;
606       return *this;
607     }
608 
609     TensorIterator<std::string_view> operator++(int) {
610       auto temp(*this);
611       ++index_;
612       return temp;
613     }
614 
615     TensorIterator<std::string_view> operator--(int) {
616       auto temp(*this);
617       --index_;
618       return temp;
619     }
620 
621     TensorIterator<std::string_view> operator+(const dsize_t &inc) {
622       auto oldPtr = index_;
623       index_ += inc;
624       auto temp(*this);
625       index_ = oldPtr;
626       return temp;
627     }
628 
629     TensorIterator<std::string_view> operator-(const dsize_t &inc) {
630       auto oldPtr = index_;
631       index_ -= inc;
632       auto temp(*this);
633       index_ = oldPtr;
634       return temp;
635     }
636 
637    protected:
638     dsize_t index_;
639     const char *data_;
640   };
641 
642   /// Return a TensorIterator that points to the start of the Tensor.
643   /// It's the user responsibility to use the correct type that matches the Tensor type
644   /// \tparam T The type of values in the Tensor
645   /// \return TensorIterator
646   template <typename T>
begin()647   TensorIterator<T> begin() {
648     return TensorIterator<T>(data_);
649   }
650 
651   /// Return a linear iterator that points to the place after the last element of the Tensor.
652   /// \tparam T The type of values in the Tensor
653   /// \return TensorIterator
654   template <typename T>
end()655   TensorIterator<T> end() {
656     return TensorIterator<T>(data_end_);
657   }
658 
659   /// Copies the last dimension at `index` from Tensor `src` to this Tensor.
660   /// \param[in] src Tensor
661   /// \param[in] index vector to the start of the dimension. The last dim should be 0
662   /// \return Status
663   Status CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index);
664 
665  protected:
666   /// Allocate memory for the tensor using the data_allocator
667   /// \param[in] length number of bytes to be allocated
668   /// \return Error Status
669   Status AllocateBuffer(const dsize_t &length);
670 
671   /// Get the starting memory address for the data of the tensor.  This potentially
672   /// drives an allocation if the data is null.
673   /// \return unsigned char*
GetMutableBuffer()674   unsigned char *GetMutableBuffer() { return data_; }
675 
676   /// A function that prints Tensor recursively, first called by print
677   /// \param[in] out
678   /// \param[in] cur_dim
679   /// \param[in] cur_index
680   void PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const;
681 
682   /// A function that prints info about the tensor
683   /// \param[out] out output stream
684   void Print(std::ostream &out) const;
685 
686   /// A function that prints info about the tensor
687   /// \param[out] out output stream
688   void PrintData(std::ostream &out) const;
689 
690   /// A function that print the value as specified by its index
691   /// \param[in] index vector representing the index
692   /// \param[out] out
693   void PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const;
694 
695   /// Get pointer to item located at `index`, caller needs to provide the type.
696   /// \tparam T
697   /// \param[in] index vector<dsize_t>
698   /// \return return a pointer to the item specified at index of type `T`
699   template <typename T>
700   Status GetItemPtr(T **, const std::vector<dsize_t> &index) const;
701 
702   /// Get pointer to string located at `index` and the length of string
703   /// \param[in] index vector<dsize_t>
704   /// \return return a pointer to the string specified at index and the length of the string
705   Status GetItemPtr(uchar **, const std::vector<dsize_t> &index, offset_t *length = nullptr) const;
706 
707   /// Given a flat index of an item string, return the start and length of the item
708   /// \param[in] index flat index of the item
709   /// \param[out] start address of the ths string
710   /// \param[out] length of the string
711   Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const;
712 
713   /// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if
714   /// the tensor's type is a string, otherwise undefined address would be returned. \return address of the first string
715   /// of the tensor.
GetStringsBuffer()716   uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; }
717 
718   /// all access to shape_ should be via shape
719   TensorShape shape_;
720   /// data type of tensor
721   DataType type_;
722   /// pointer to the start of the physical data
723   unsigned char *data_;
724   /// An allocator for data_
725   CharAllocPtr data_allocator_;
726   /// pointer to the end of the physical data
727   unsigned char *data_end_ = nullptr;
728 
729   /// shape for interpretation of YUV image
730   std::vector<uint32_t> yuv_shape_;
731 
732  private:
733   friend class DETensor;
734 
735   /// Slice numeric tensors.
736   Status SliceNumeric(TensorPtr *out, const std::vector<std::vector<dsize_t>> &indices, const TensorShape &shape);
737 
738   /// Slice string tensors
739   Status SliceString(TensorPtr *out, const std::vector<std::vector<dsize_t>> &indices, const TensorShape &shape);
740 
741   /// Copy raw data of a array based on shape and strides to the destination pointer
742   /// \param dst [out] Pointer to the destination array where the content is to be copied
743   /// \param[in] src Pointer to the source of strided array to be copied
744   /// \param[in] shape shape of the source array
745   /// \param[in] strides strides of the source array
746   /// \param[in] type_size number of bytes needed to store one array element's type
747   /// \return Status Code
748   static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
749                                  std::vector<dsize_t> strides, uint8_t type_size);
750 
751   /// const of the size of the offset variable
752   static constexpr uint8_t kOffsetSize = sizeof(offset_t);
753 
754 #ifdef ENABLE_PYTHON
755   /// Helper function to create a tensor from Numpy array of strings
756   /// \param[in] arr Numpy array
757   /// \param[out] out Created Tensor
758   /// \return Status
759   static Status CreateFromNpString(py::array arr, TensorPtr *out);
760 #endif
761 };
762 template <>
763 inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() {
764   return TensorIterator<std::string_view>(data_, shape_.NumOfElements());
765 }
766 
767 /// Create a Tensor from a given list of strings.
768 /// @note: The memory layout of a Tensor of strings consists of the Offset_array followed by the strings.
769 /// The offset array will store one extra value to find the length of the last string.
770 /// OFFSET_1, OFFSET_2, ..., OFFSET_n+1, STRING_1, STRING_2, ..., STRING_n
771 /// The value of each offset is the start index of the corresponding string
772 /// Offsets is of type offset_t
773 /// strings will ne null-terminated
774 /// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
775 /// |----------------------------------------------------------------|
776 /// |             OFFSET ARRAY           |            STRINGS        |
777 /// | bytes 0-3 | bytes 3-6 | bytes 7-10 | bytes 11-14 | bytes 15-17 |
778 /// |     11    |    15     |     18     |     abc\0   |      de\0   |
779 /// |----------------------------------------------------------------|
780 /// \param[in] items elements of the tensor
781 /// \param[in] shape shape of the output tensor
782 /// \param[out] out output argument to hold the created Tensor
783 /// \return Status Code
784 template <>
785 inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::string> &items, const TensorShape &shape,
786                                                     TensorPtr *out) {
787   RETURN_UNEXPECTED_IF_NULL(out);
788   CHECK_FAIL_RETURN_UNEXPECTED(
789     static_cast<dsize_t>(items.size()) == shape.NumOfElements(),
790     "Number of elements in the vector does not match the number of elements of the shape required");
791   const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
792   *out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(items.size())}),
793                                       DataType(DataType::DE_STRING));
794   CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
795   if (items.size() == 0) {
796     if (shape.known()) {
797       return (*out)->Reshape(shape);
798     }
799   }
800   auto length_sum = [](dsize_t sum, const std::string &s) { return s.length() + sum; };
801   dsize_t total_length = std::accumulate(items.begin(), items.end(), 0, length_sum);
802 
803   // total bytes needed = offset array + strings
804   // offset array needs to store one offset var per element + 1 extra to get the length of the last string.
805   // strings will be null-terminated --> need 1 extra byte per element
806   dsize_t num_bytes = (kOffsetSize + 1) * (*out)->shape_.NumOfElements() + kOffsetSize + total_length;
807 
808   RETURN_IF_NOT_OK((*out)->AllocateBuffer(num_bytes));
809   auto offset_arr = reinterpret_cast<offset_t *>((*out)->data_);
810   uchar *buf = (*out)->GetStringsBuffer();
811 
812   offset_t offset = buf - (*out)->data_;  // the first string will start here
813   uint32_t i = 0;
814   for (const auto &str : items) {
815     //  insert the start index of the string.
816     offset_arr[i++] = offset;
817     // total bytes are reduced by kOffsetSize
818     num_bytes -= kOffsetSize;
819     // insert actual string
820     int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
821     if (ret_code != 0) MS_LOG(ERROR) << "Cannot copy string into Tensor";
822     //  next string will be stored right after the current one.
823     offset = offset + str.length() + 1;
824     // total bytes are reduced by the length of the string
825     num_bytes -= str.length() + 1;
826   }
827   // store one more offset value so we can get the length of the last string
828   offset_arr[i] = offset;
829 
830   (*out)->data_end_ = (*out)->data_ + offset_arr[i];
831 
832   MS_ASSERT(num_bytes == 0);
833   if (shape.known()) {
834     RETURN_IF_NOT_OK((*out)->Reshape(shape));
835   }
836   return Status::OK();
837 }
838 /// Create a string scalar Tensor from the given value.
839 /// \param[in] item value
840 /// \param[out] out Created tensor
841 /// \return Status code
842 template <>
843 inline Status Tensor::CreateScalar<std::string>(const std::string &item, TensorPtr *out) {
844   RETURN_UNEXPECTED_IF_NULL(out);
845   return CreateFromVector<std::string>({item}, TensorShape::CreateScalar(), out);
846 }
847 }  // namespace dataset
848 }  // namespace mindspore
849 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_
850