1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_TEXT_DATA_UTILS_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_TEXT_DATA_UTILS_H_ 18 19 #include <memory> 20 #include <string> 21 #include <vector> 22 #include "minddata/dataset/util/status.h" 23 #include "minddata/dataset/include/dataset/constants.h" 24 #include "minddata/dataset/core/data_type.h" 25 #include "minddata/dataset/core/tensor.h" 26 #include "minddata/dataset/core/cv_tensor.h" 27 #include "minddata/dataset/core/tensor_shape.h" 28 #include "minddata/dataset/core/tensor_row.h" 29 30 namespace mindspore { 31 namespace dataset { 32 /// \brief Helper method that perform sliding window on input tensor. 33 /// \param[in] input - Input tensor. 34 /// \param[in] out_shape - Output shape of output tensor. 35 /// \param[in] width - The axis along which sliding window is computed. 36 /// \param[in] axis - The width of the window. 37 /// \param[out] output - Output tensor 38 /// \return Status return code 39 Status SlidingWindowHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, TensorShape out_shape, 40 uint32_t width, int32_t axis); 41 42 /// \brief Helper method that append offsets tensor to output TensorRow. 43 /// \param[in] offsets_start - Offsets start index vector. 44 /// \param[in] offsets_limit - Offsets length vector. 45 /// \param[out] output - Output TensorRow 46 /// \return Status return code 47 Status AppendOffsetsHelper(const std::vector<uint32_t> &offsets_start, const std::vector<uint32_t> &offsets_limit, 48 TensorRow *output); 49 50 /// \brief Helper method that add token on input tensor. 51 /// \param[in] input Input tensor. 52 /// \param[in] token The token to be added. 53 /// \param[in] begin Whether to insert token at start or end of sequence. 54 /// \param[out] output Output tensor. 55 /// \return Status return code. 56 Status AddToken(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::string &token, 57 bool begin); 58 59 /// \brief Truncate the input sequence so that it does not exceed the maximum length. 60 /// \param[in] max_seq_len Maximum allowable length. 61 /// \param[out] output Output Tensor. 62 Status Truncate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int max_seq_len); 63 } // namespace dataset 64 } // namespace mindspore 65 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_TEXT_DATA_UTILS_H_ 66