• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_SENTENCE_PIECE_VOCAB_H_
18 #define MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_SENTENCE_PIECE_VOCAB_H_
19 
20 #include <string>
21 #include <memory>
22 #include <vector>
23 #include <unordered_map>
24 #include "minddata/dataset/util/status.h"
25 #include "minddata/dataset/include/dataset/constants.h"
26 
27 namespace mindspore {
28 namespace dataset {
29 
30 class SentencePieceVocab {
31  public:
32   static Status BuildFromFile(const std::vector<std::string> &path_list, const int32_t vocab_size,
33                               const float character_coverage, const SentencePieceModel model_type,
34                               const std::unordered_map<std::string, std::string> &params,
35                               std::shared_ptr<SentencePieceVocab> *vocab);
36   static Status SaveModel(const std::shared_ptr<SentencePieceVocab> *vocab, std::string path, std::string filename);
37   SentencePieceVocab();
38 
39   ~SentencePieceVocab() = default;
40 
41   const std::string &model_proto();
42 
43   void set_model_proto(const std::string model_proto);
44 
45  private:
46   std::string model_proto_;
47 };
48 }  // namespace dataset
49 }  // namespace mindspore
50 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_SENTENCE_PIECE_VOCAB_H_
51