• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "minddata/dataset/text/kernels/data_utils.h"
18 
19 #include <algorithm>
20 #include <string>
21 
22 #include "minddata/dataset/core/pybind_support.h"
23 #include "minddata/dataset/kernels/data/slice_op.h"
24 #include "minddata/dataset/kernels/data/concatenate_op.h"
25 #include "minddata/dataset/kernels/data/data_utils.h"
26 
27 namespace mindspore {
28 namespace dataset {
SlidingWindowHelper(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,TensorShape out_shape,uint32_t width,int32_t axis)29 Status SlidingWindowHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, TensorShape out_shape,
30                            uint32_t width, int32_t axis) {
31   // if the data row has fewer items than width, the corresponding result row will be empty
32   if (out_shape.Size() == 0) {
33     MS_LOG(WARNING) << "The data row has fewer items than width, the result will be empty.";
34     return Tensor::CreateEmpty(TensorShape({0}), input->type(), output);
35   }
36 
37   axis = Tensor::HandleNeg(axis, input->shape().Size());
38   int32_t axis_end = input->shape()[axis];
39   std::shared_ptr<Tensor> tmp;
40   auto concatenate_op = std::make_unique<ConcatenateOp>(axis, nullptr, nullptr);
41 
42   // Slice on specified axis and concatenate on new axis
43   for (int32_t i = 0; i + width <= axis_end; i++) {
44     auto slice_op = std::make_unique<SliceOp>(Slice(i, i + width, 1));
45     RETURN_IF_NOT_OK(slice_op->Compute(input, &tmp));
46     if (i == 0) {
47       *output = tmp;
48     } else {
49       TensorRow in({*output, tmp});
50       TensorRow out_row;
51       RETURN_IF_NOT_OK(concatenate_op->Compute(in, &out_row));
52       *output = out_row[0];
53     }
54   }
55   RETURN_IF_NOT_OK((*output)->Reshape(out_shape));
56   return Status::OK();
57 }
58 
AppendOffsetsHelper(const std::vector<uint32_t> & offsets_start,const std::vector<uint32_t> & offsets_limit,TensorRow * output)59 Status AppendOffsetsHelper(const std::vector<uint32_t> &offsets_start, const std::vector<uint32_t> &offsets_limit,
60                            TensorRow *output) {
61   std::shared_ptr<Tensor> offsets_start_tensor, offsets_limit_tensor;
62   RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_start, &offsets_start_tensor));
63   RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_limit, &offsets_limit_tensor));
64 
65   output->push_back(offsets_start_tensor);
66   output->push_back(offsets_limit_tensor);
67   return Status::OK();
68 }
69 
AddToken(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,const std::string & token,bool begin)70 Status AddToken(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const std::string &token,
71                 bool begin) {
72   if (input->Rank() == 1) {
73     std::shared_ptr<Tensor> append;
74     RETURN_IF_NOT_OK(Tensor::CreateFromVector(std::vector<std::string>({token}), &append));
75     TensorRow in({input});
76     TensorRow out;
77     RETURN_IF_NOT_OK(Concatenate(in, &out, 0, begin ? append : nullptr, begin ? nullptr : append));
78     *output = out[0];
79   } else {
80     std::vector<std::string> output_vector;
81     int dim = input->shape()[0];
82     int shape = input->shape()[-1];
83     int count = 0;
84     for (auto it = input->begin<std::string_view>(); it != input->end<std::string_view>(); ++it) {
85       if (count >= shape) {
86         count = 0;
87       }
88       if (begin && count == 0) {
89         output_vector.emplace_back(token);
90       }
91       output_vector.emplace_back(*it);
92       if (!begin && count == shape - 1) {
93         output_vector.emplace_back(token);
94       }
95       count++;
96     }
97     shape++;
98     RETURN_IF_NOT_OK(Tensor::CreateFromVector(output_vector, TensorShape({dim, shape}), output));
99   }
100   return Status::OK();
101 }
102 
Truncate(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,int max_seq_len)103 Status Truncate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int max_seq_len) {
104   if (input->shape().Rank() == 1) {
105     return input->Slice(output, {SliceOption(Slice(max_seq_len))});
106   } else {
107     int dim = input->shape()[0];
108     Slice slice_dim = Slice(dim);
109     Slice slice_len = Slice(max_seq_len);
110     return input->Slice(output, {SliceOption(slice_dim), SliceOption(slice_len)});
111   }
112 }
113 }  // namespace dataset
114 }  // namespace mindspore
115