1 /** 2 * Copyright 2019 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_DATA_DATA_UTILS_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_DATA_DATA_UTILS_H_ 18 19 #include <memory> 20 #include <string> 21 #include <vector> 22 #include <unordered_map> 23 #include "minddata/dataset/include/dataset/constants.h" 24 #ifndef ENABLE_ANDROID 25 #include "minddata/dataset/core/cv_tensor.h" 26 #endif 27 #include "minddata/dataset/core/data_type.h" 28 #include "minddata/dataset/core/tensor.h" 29 #include "minddata/dataset/core/tensor_row.h" 30 31 namespace mindspore { 32 namespace dataset { 33 // Returns Onehot encoding of the input tensor. 34 // Example: if input=2 and numClasses=3, the output is [0 0 1]. 35 // @param input: Tensor has type DE_UINT64, the non-one hot values are stored 36 // along the first dimensions or rows.. 37 // If the rank of input is not 1 or the type is not DE_UINT64, 38 // then it will fail. 39 // @param output: Tensor. The shape of the output tensor is <input_shape, numClasses> 40 // and the type is same as input. 41 // @param num_classes: Number of classes to. 42 Status OneHotEncoding(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, dsize_t num_classes); 43 44 Status OneHotEncodingUnsigned(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, 45 dsize_t num_classes, int64_t index); 46 47 Status OneHotEncodingSigned(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, dsize_t num_classes, 48 int64_t index); 49 50 // Returns a tensor of shape input filled with the passed fill_value 51 // @param input Tensor 52 // @param output Tensor. The shape and type of the output tensor is same as input 53 // @param fill_value Tensor. A scalar tensor used to fill the output tensor 54 55 Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, std::shared_ptr<Tensor> fill_value); 56 57 // Returns a type changed input tensor. 58 // Example: if input tensor is float64, the output will the specified dataType. See DataTypes.cpp 59 // @param input Tensor 60 // @param output Tensor. The shape of the output tensor is same as input with the type changed. 61 // @param data_type: type of data to cast data to 62 // @note: this operation will do a memcpy and if the value is truncated then precision will be lost 63 64 template <typename T> 65 void CastFrom(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 66 67 template <typename FROM, typename TO> 68 void Cast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 69 70 Status ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); 71 72 Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type); 73 74 // Pad input tensor according pad_shape, need to have same rank. 75 // Based on the type of the input tensor, PadEndNumeric/String will be called. 76 // @param std::shared_ptr<Tensor> src - tensor to pad from 77 // @param std::shared_ptr<Tensor> *dst - return tensor padded 78 // @param std::vector<dsize_t> pad_shape - shape to pad to 79 // @param std::shared_ptr<Tensor> pad_val - value to pad with in Tensor format, 80 // @return Status The status code returned 81 Status PadEnd(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst, const std::vector<dsize_t> &pad_shape, 82 const std::shared_ptr<Tensor> &pad_val); 83 84 // Pad input numeric tensor according pad_shape, need to have same rank. 85 // @param std::shared_ptr<Tensor> src - tensor to pad from 86 // @param std::shared_ptr<Tensor> *dst - return tensor padded 87 // @param std::vector<dsize_t> pad_shape - shape to pad to 88 // @param float pad_val - value to pad with 89 // @return Status The status code returned 90 Status PadEndNumeric(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst, 91 const std::vector<dsize_t> &pad_shape, float pad_val); 92 93 // recursive helper function for padding numric tensors. This function could be very expensive if called on a 94 // multi-dimensional tensor it is only meant to be called by PadEndNumeric. 95 // @tparam T - type of tensor and fill value 96 // @param std::shared_ptr<Tensor> src - Tensor to pad from 97 // @param std::shared_ptr<Tensor>* dst - Tensor to pad to, return value 98 // @param std::vector<dsize_t> cur_ind - recursion helper 99 // @param T pad_val - value to pad tensor with 100 // @param size_t cur_dim - recursion helper 101 // @return Status The status code returned 102 Status PadEndNumericHelper(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> dst, 103 std::vector<dsize_t> cur_ind, size_t cur_dim = 0); 104 105 // Pad input string tensor according pad_shape, need to have same rank. 106 // @param std::shared_ptr<Tensor> src - tensor to pad from 107 // @param std::shared_ptr<Tensor> *dst - return tensor padded 108 // @param std::vector<dsize_t> pad_shape - shape to pad to 109 // @param std::string pad_val - value to pad with 110 // @return Status The status code returned 111 Status PadEndString(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor> *dst, 112 const std::vector<dsize_t> &pad_shape, const std::string &pad_val); 113 114 // recursive helper function for padding string tensors. This function could be very expensive if called on a 115 // multi-dimensional tensor it is only meant to be called by PadEndString. 116 // @tparam T - type of tensor and fill value 117 // @param std::shared_ptr<Tensor> src - Tensor to pad from 118 // @param std::shared_ptr<Tensor>* dst - Tensor to pad to, return value 119 // @param std::vector<dsize_t> cur_ind - recursion helperas text 120 // @param std::string pad_val - value to pad tensor with 121 // @param size_t cur_dim - recursion helper 122 // @return Status The status code returned 123 Status PadEndStringHelper(const std::shared_ptr<Tensor> &src, std::vector<std::string> *dst, 124 const TensorShape &dst_shape, std::vector<dsize_t> cur_ind, size_t cur_dim, 125 const std::string &pad_value); 126 127 /// Helper method that masks the input tensor 128 /// @tparam T type of the tensor 129 /// @param input[in] input tensor 130 /// @param output[out] output tensor 131 /// @param value_tensor[in] scalar tensor value to compared with 132 /// @param op[in] RelationalOp enum 133 /// @return Status ok/error 134 template <typename T> 135 Status MaskHelper(const std::shared_ptr<Tensor> &input, const std::shared_ptr<Tensor> &output, 136 const std::shared_ptr<Tensor> &value_tensor, RelationalOp op); 137 138 /// Mask the input tensor 139 /// @param input[in] input tensor 140 /// @param output[out] output tensor 141 /// @param value[in] scalar tensor value to compared with 142 /// @param op[in] RelationalOp enum 143 /// @return Status ok/error 144 Status Mask(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::shared_ptr<Tensor> &value, 145 RelationalOp op); 146 147 Status Concatenate(const TensorRow &input, TensorRow *output, int8_t axis, std::shared_ptr<Tensor> prepend, 148 std::shared_ptr<Tensor> append); 149 150 // helper for concat, always append to the input, and pass that to the output 151 Status ConcatenateHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int8_t axis, 152 std::shared_ptr<Tensor> append); 153 154 /// Convert an n-dimensional Tensor to a vector of (n-1)-dimensional CVTensors 155 /// \param input[in] input tensor 156 /// \param output[out] output vector of CVTensors 157 /// \return Status ok/error 158 Status BatchTensorToCVTensorVector(const std::shared_ptr<Tensor> &input, 159 std::vector<std::shared_ptr<CVTensor>> *output); 160 161 /// Convert an n-dimensional Tensor to a vector of (n-1)-dimensional Tensors 162 /// \param input[in] input tensor 163 /// \param output[out] output vector of tensors 164 /// \return Status ok/error 165 Status BatchTensorToTensorVector(const std::shared_ptr<Tensor> &input, std::vector<std::shared_ptr<Tensor>> *output); 166 167 /// Convert a vector of (n-1)-dimensional Tensors to an n-dimensional Tensor 168 /// \param input[in] input vector of tensors 169 /// \param output[out] output tensor 170 /// \return Status ok/error 171 Status TensorVectorToBatchTensor(const std::vector<std::shared_ptr<Tensor>> &input, std::shared_ptr<Tensor> *output); 172 173 /// Helper method that uniques the input tensor 174 /// @tparam T type of the tensor 175 /// \param input[in] input 1d tensor 176 /// \param output[out] output tensor 177 /// \param output[out] output tensor of item index 178 /// \param output[out] output tensor of item count 179 /// \return Status ok/error 180 template <typename T> 181 Status UniqueHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, 182 std::shared_ptr<Tensor> *output_idx, std::shared_ptr<Tensor> *output_cnt); 183 184 /// Unique the input tensor 185 /// @tparam T type of the tensor 186 /// \param input[in] input 1d tensor 187 /// \param output[out] output tensor 188 /// \param output[out] output tensor of item index 189 /// \param output[out] output tensor of item count 190 /// \return Status ok/error 191 Status Unique(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, 192 std::shared_ptr<Tensor> *output_idx, std::shared_ptr<Tensor> *output_cnt); 193 } // namespace dataset 194 } // namespace mindspore 195 196 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_DATA_DATA_UTILS_H_ 197