• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_TEXT_DATA_UTILS_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_TEXT_DATA_UTILS_H_
18 
19 #include <memory>
20 #include <string>
21 #include <vector>
22 #include "minddata/dataset/util/status.h"
23 #include "minddata/dataset/include/dataset/constants.h"
24 #include "minddata/dataset/core/data_type.h"
25 #include "minddata/dataset/core/tensor.h"
26 #include "minddata/dataset/core/cv_tensor.h"
27 #include "minddata/dataset/core/tensor_shape.h"
28 #include "minddata/dataset/core/tensor_row.h"
29 
30 namespace mindspore {
31 namespace dataset {
32 /// \brief Helper method that perform sliding window on input tensor.
33 /// \param[in] input - Input tensor.
34 /// \param[in] out_shape - Output shape of output tensor.
35 /// \param[in] width - The axis along which sliding window is computed.
36 /// \param[in] axis - The width of the window.
37 /// \param[out] output - Output tensor
38 /// \return Status return code
39 Status SlidingWindowHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, TensorShape out_shape,
40                            uint32_t width, int32_t axis);
41 
42 /// \brief Helper method that append offsets tensor to output TensorRow.
43 /// \param[in] offsets_start - Offsets start index vector.
44 /// \param[in] offsets_limit - Offsets length vector.
45 /// \param[out] output - Output TensorRow
46 /// \return Status return code
47 Status AppendOffsetsHelper(const std::vector<uint32_t> &offsets_start, const std::vector<uint32_t> &offsets_limit,
48                            TensorRow *output);
49 
50 /// \brief Helper method that add token on input tensor.
51 /// \param[in] input Input tensor.
52 /// \param[in] token The token to be added.
53 /// \param[in] begin Whether to insert token at start or end of sequence.
54 /// \param[out] output Output tensor.
55 /// \return Status return code.
56 Status AddToken(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::string &token,
57                 bool begin);
58 
59 /// \brief Truncate the input sequence so that it does not exceed the maximum length.
60 /// \param[in] max_seq_len Maximum allowable length.
61 /// \param[out] output Output Tensor.
62 Status Truncate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int max_seq_len);
63 }  // namespace dataset
64 }  // namespace mindspore
65 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_TEXT_DATA_UTILS_H_
66