• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/text/kernels/to_vectors_op.h"
17 
18 namespace mindspore {
19 namespace dataset {
ToVectorsOp(const std::shared_ptr<Vectors> & vectors,const std::vector<float> & unk_init,bool lower_case_backup)20 ToVectorsOp::ToVectorsOp(const std::shared_ptr<Vectors> &vectors, const std::vector<float> &unk_init,
21                          bool lower_case_backup)
22     : vectors_(vectors), unk_init_(unk_init), lower_case_backup_(lower_case_backup) {}
23 
Compute(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)24 Status ToVectorsOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
25   IO_CHECK(input, output);
26   CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "ToVectors: input tensor type should be string.");
27   CHECK_FAIL_RETURN_UNEXPECTED(unk_init_.size() == 0 || unk_init_.size() == vectors_->Dim(),
28                                "ToVectors: unk_init must be the same length as vectors, but got unk_init: " +
29                                  std::to_string(unk_init_.size()) + " and vectors: " + std::to_string(vectors_->Dim()));
30 
31   std::vector<float> vectors_vec;
32   int len = 0;
33   for (auto itr = input->begin<std::string_view>(); itr != input->end<std::string_view>(); ++itr) {
34     std::vector<float> vectors_value = vectors_->Lookup(std::string(*itr), unk_init_, lower_case_backup_);
35     CHECK_FAIL_RETURN_UNEXPECTED(!vectors_value.empty(), "ToVectors: invalid data, token: \"" + std::string(*itr) +
36                                                            "\" doesn't exist in vectors and no unk_init is specified.");
37     vectors_vec.insert(vectors_vec.end(), vectors_value.begin(), vectors_value.end());
38     len++;
39   }
40 
41   int dim = static_cast<int>(vectors_vec.size() / len);
42   if (vectors_vec.size() == dim) {
43     RETURN_IF_NOT_OK(Tensor::CreateFromVector(vectors_vec, output));
44   } else {
45     RETURN_IF_NOT_OK(Tensor::CreateFromVector(vectors_vec, TensorShape({len, dim}), output));
46   }
47   return Status::OK();
48 }
49 
OutputType(const std::vector<DataType> & inputs,std::vector<DataType> & outputs)50 Status ToVectorsOp::OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) {
51   CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() == NumInput() && outputs.size() == NumOutput(),
52                                "ToVectors: input and output size don't match.");
53   CHECK_FAIL_RETURN_UNEXPECTED(inputs[0] == DataType::DE_STRING, "ToVectors: input tensor type should be string.");
54   outputs[0] = DataType(DataType::DE_FLOAT32);
55   return Status::OK();
56 }
57 }  // namespace dataset
58 }  // namespace mindspore
59