• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_FAST_TEXT_H_
18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_FAST_TEXT_H_
19 
20 #include <memory>
21 #include <string>
22 #include <unordered_map>
23 #include <utility>
24 #include <vector>
25 
26 #include "minddata/dataset/core/tensor.h"
27 #include "minddata/dataset/include/dataset/iterator.h"
28 #include "minddata/dataset/text/vectors.h"
29 #include "minddata/dataset/util/path.h"
30 
31 namespace mindspore {
32 namespace dataset {
33 /// \brief Pre-train word vectors.
34 class FastText : public Vectors {
35  public:
36   /// Constructor.
37   FastText() = default;
38 
39   /// Constructor.
40   /// \param[in] map A map between string and vector.
41   /// \param[in] dim Dimension of the vectors.
42   FastText(const std::unordered_map<std::string, std::vector<float>> &map, int32_t dim);
43 
44   /// Destructor.
45   ~FastText() = default;
46 
47   /// \brief Build Vectors from reading a pre-train vector file.
48   /// \param[out] fast_text FastText object which contains the pre-train vectors.
49   /// \param[in] path Path to the pre-trained word vector file. The suffix of set must be `*.vec`.
50   /// \param[in] max_vectors This can be used to limit the number of pre-trained vectors loaded (default=0, no limit).
51   static Status BuildFromFile(std::shared_ptr<FastText> *fast_text, const std::string &path, int32_t max_vectors = 0);
52 };
53 }  // namespace dataset
54 }  // namespace mindspore
55 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_FAST_TEXT_H_
56