• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_DATA_DATA_UTILS_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_DATA_DATA_UTILS_H_
18 
19 #include <memory>
20 #include <string>
21 #include <vector>
22 #include <unordered_map>
23 #include "minddata/dataset/include/dataset/constants.h"
24 #ifndef ENABLE_ANDROID
25 #include "minddata/dataset/core/cv_tensor.h"
26 #endif
27 #include "minddata/dataset/core/data_type.h"
28 #include "minddata/dataset/core/tensor.h"
29 #include "minddata/dataset/core/tensor_row.h"
30 
31 namespace mindspore {
32 namespace dataset {
33 // Returns Onehot encoding of the input tensor.
34 //          Example: if input=2 and numClasses=3, the output is [0 0 1].
35 // @param input: Tensor has type DE_UINT64, the non-one hot values are stored
36 //               along the first dimensions or rows..
37 //               If the rank of input is not 1 or the type is not DE_UINT64,
38 //               then it will fail.
39 // @param output: Tensor. The shape of the output tensor is <input_shape, numClasses>
40 //                and the type is same as input.
41 // @param num_classes: Number of classes to.
42 Status OneHotEncoding(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, dsize_t num_classes);
43 
44 Status OneHotEncodingUnsigned(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
45                               dsize_t num_classes, int64_t index);
46 
47 Status OneHotEncodingSigned(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, dsize_t num_classes,
48                             int64_t index);
49 
50 // Returns a tensor of shape input filled with the passed fill_value
51 // @param input  Tensor
52 // @param output Tensor. The shape and type of the output tensor is same as input
53 // @param fill_value Tensor. A scalar tensor used to fill the output tensor
54 
55 Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, std::shared_ptr<Tensor> fill_value);
56 
57 // Returns a type changed input tensor.
58 //          Example: if input tensor is float64, the output will the specified dataType. See DataTypes.cpp
59 // @param input  Tensor
60 // @param output Tensor. The shape of the output tensor is same as input with the type changed.
61 // @param data_type: type of data to cast data to
62 // @note: this operation will do a memcpy and if the value is truncated then precision will be lost
63 
64 template <typename T>
65 void CastFrom(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
66 
67 template <typename FROM, typename TO>
68 void Cast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
69 
70 Status ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
71 
72 Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type);
73 
74 // Pad input tensor according pad_shape, need to have same rank.
75 // Based on the type of the input tensor, PadEndNumeric/String will be called.
76 // @param std::shared_ptr<Tensor> src - tensor to pad from
77 // @param std::shared_ptr<Tensor> *dst - return tensor padded
78 // @param std::vector<dsize_t> pad_shape - shape to pad to
79 // @param std::shared_ptr<Tensor> pad_val - value to pad with in Tensor format,
80 // @return Status The status code returned
81 Status PadEnd(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst, const std::vector<dsize_t> &pad_shape,
82               const std::shared_ptr<Tensor> &pad_val);
83 
84 // Pad input numeric tensor according pad_shape, need to have same rank.
85 // @param std::shared_ptr<Tensor> src - tensor to pad from
86 // @param std::shared_ptr<Tensor> *dst - return tensor padded
87 // @param std::vector<dsize_t> pad_shape - shape to pad to
88 // @param float pad_val - value to pad with
89 // @return Status The status code returned
90 Status PadEndNumeric(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst,
91                      const std::vector<dsize_t> &pad_shape, float pad_val);
92 
93 // recursive helper function for padding numric tensors. This function could be very expensive if called on a
94 // multi-dimensional tensor it is only meant to be called by PadEndNumeric.
95 // @tparam T - type of tensor and fill value
96 // @param std::shared_ptr<Tensor> src - Tensor to pad from
97 // @param std::shared_ptr<Tensor>* dst - Tensor to pad to, return value
98 // @param std::vector<dsize_t> cur_ind - recursion helper
99 // @param T pad_val - value to pad tensor with
100 // @param size_t cur_dim - recursion helper
101 // @return Status The status code returned
102 Status PadEndNumericHelper(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> dst,
103                            std::vector<dsize_t> cur_ind, size_t cur_dim = 0);
104 
105 // Pad input string tensor according pad_shape, need to have same rank.
106 // @param std::shared_ptr<Tensor> src - tensor to pad from
107 // @param std::shared_ptr<Tensor> *dst - return tensor padded
108 // @param std::vector<dsize_t> pad_shape - shape to pad to
109 // @param std::string pad_val - value to pad with
110 // @return Status The status code returned
111 Status PadEndString(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst,
112                     const std::vector<dsize_t> &pad_shape, const std::string &pad_val);
113 
114 // recursive helper function for padding string tensors. This function could be very expensive if called on a
115 // multi-dimensional tensor it is only meant to be called by PadEndString.
116 // @tparam T - type of tensor and fill value
117 // @param std::shared_ptr<Tensor> src - Tensor to pad from
118 // @param std::shared_ptr<Tensor>* dst - Tensor to pad to, return value
119 // @param std::vector<dsize_t> cur_ind - recursion helperas text
120 // @param std::string pad_val - value to pad tensor with
121 // @param size_t cur_dim - recursion helper
122 // @return Status The status code returned
123 Status PadEndStringHelper(const std::shared_ptr<Tensor> &src, std::vector<std::string> *dst,
124                           const TensorShape &dst_shape, std::vector<dsize_t> cur_ind, size_t cur_dim,
125                           const std::string &pad_value);
126 
127 /// Helper method that masks the input tensor
128 /// @tparam T type of the tensor
129 /// @param input[in] input tensor
130 /// @param output[out] output tensor
131 /// @param value_tensor[in] scalar tensor value to compared with
132 /// @param op[in] RelationalOp enum
133 /// @return Status ok/error
134 template <typename T>
135 Status MaskHelper(const std::shared_ptr<Tensor> &input, const std::shared_ptr<Tensor> &output,
136                   const std::shared_ptr<Tensor> &value_tensor, RelationalOp op);
137 
138 /// Mask the input tensor
139 /// @param input[in] input tensor
140 /// @param output[out] output tensor
141 /// @param value[in] scalar tensor value to compared with
142 /// @param op[in] RelationalOp enum
143 /// @return Status ok/error
144 Status Mask(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::shared_ptr<Tensor> &value,
145             RelationalOp op);
146 
147 Status Concatenate(const TensorRow &input, TensorRow *output, int8_t axis, std::shared_ptr<Tensor> prepend,
148                    std::shared_ptr<Tensor> append);
149 
150 // helper for concat, always append to the input, and pass that to the output
151 Status ConcatenateHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int8_t axis,
152                          std::shared_ptr<Tensor> append);
153 
154 /// Convert an n-dimensional Tensor to a vector of (n-1)-dimensional CVTensors
155 /// \param input[in] input tensor
156 /// \param output[out] output vector of CVTensors
157 /// \return Status ok/error
158 Status BatchTensorToCVTensorVector(const std::shared_ptr<Tensor> &input,
159                                    std::vector<std::shared_ptr<CVTensor>> *output);
160 
161 /// Convert an n-dimensional Tensor to a vector of (n-1)-dimensional Tensors
162 /// \param input[in] input tensor
163 /// \param output[out] output vector of tensors
164 /// \return Status ok/error
165 Status BatchTensorToTensorVector(const std::shared_ptr<Tensor> &input, std::vector<std::shared_ptr<Tensor>> *output);
166 
167 /// Convert a vector of (n-1)-dimensional Tensors to an n-dimensional Tensor
168 /// \param input[in] input vector of tensors
169 /// \param output[out] output tensor
170 /// \return Status ok/error
171 Status TensorVectorToBatchTensor(const std::vector<std::shared_ptr<Tensor>> &input, std::shared_ptr<Tensor> *output);
172 
173 /// Helper method that uniques the input tensor
174 /// @tparam T type of the tensor
175 /// \param input[in] input 1d tensor
176 /// \param output[out] output tensor
177 /// \param output[out] output tensor of item index
178 /// \param output[out] output tensor of item count
179 /// \return Status ok/error
180 template <typename T>
181 Status UniqueHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
182                     std::shared_ptr<Tensor> *output_idx, std::shared_ptr<Tensor> *output_cnt);
183 
184 /// Unique the input tensor
185 /// @tparam T type of the tensor
186 /// \param input[in] input 1d tensor
187 /// \param output[out] output tensor
188 /// \param output[out] output tensor of item index
189 /// \param output[out] output tensor of item count
190 /// \return Status ok/error
191 Status Unique(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
192               std::shared_ptr<Tensor> *output_idx, std::shared_ptr<Tensor> *output_cnt);
193 }  // namespace dataset
194 }  // namespace mindspore
195 
196 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_DATA_DATA_UTILS_H_
197