/third_party/mindspore/mindspore/dataset/text/ |
D | utils.py | 334 class SentencePieceModel(IntEnum): class 355 SentencePieceModel.UNIGRAM: cde.SentencePieceModel.DE_SENTENCE_PIECE_UNIGRAM, 356 SentencePieceModel.BPE: cde.SentencePieceModel.DE_SENTENCE_PIECE_BPE, 357 SentencePieceModel.CHAR: cde.SentencePieceModel.DE_SENTENCE_PIECE_CHAR, 358 SentencePieceModel.WORD: cde.SentencePieceModel.DE_SENTENCE_PIECE_WORD
|
D | __init__.py | 30 …import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm, SentencePieceVocab, SentencePieceModel, \
|
D | validators.py | 460 from .utils import SentencePieceModel 461 type_check(model_type, (str, SentencePieceModel), "model_type") 488 from .utils import SentencePieceModel 489 type_check(model_type, (str, SentencePieceModel), "model_type")
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/ |
D | bindings.cc | 57 const SentencePieceModel model_type, const py::dict ¶ms) { in __anonb29b31e90502() 82 PYBIND_REGISTER(SentencePieceModel, 0, ([](const py::module *m) { in __anonb29b31e90802() 83 (void)py::enum_<SentencePieceModel>(*m, "SentencePieceModel", py::arithmetic()) in __anonb29b31e90802() 84 .value("DE_SENTENCE_PIECE_UNIGRAM", SentencePieceModel::kUnigram) in __anonb29b31e90802() 85 .value("DE_SENTENCE_PIECE_BPE", SentencePieceModel::kBpe) in __anonb29b31e90802() 86 .value("DE_SENTENCE_PIECE_CHAR", SentencePieceModel::kChar) in __anonb29b31e90802() 87 .value("DE_SENTENCE_PIECE_WORD", SentencePieceModel::kWord) in __anonb29b31e90802()
|
/third_party/mindspore/tests/ut/python/dataset/ |
D | test_sentencepiece_tokenizer.py | 19 from mindspore.dataset.text import SentencePieceModel, to_str, SPieceTokenizerOutType 26 …vocab = text.SentencePieceVocab.from_file([VOCAB_FILE], 5000, 0.9995, SentencePieceModel.UNIGRAM, … 33 …vocab = text.SentencePieceVocab.from_file([VOCAB_FILE], 5000, 0.9995, SentencePieceModel.UNIGRAM, … 45 … vocab = text.SentencePieceVocab.from_file([VOCAB_FILE], 5000, 0.9995, SentencePieceModel.BPE, {}) 57 … vocab = text.SentencePieceVocab.from_file([VOCAB_FILE], 5000, 0.9995, SentencePieceModel.CHAR, {}) 70 … vocab = text.SentencePieceVocab.from_file([VOCAB_FILE], 5000, 0.9995, SentencePieceModel.WORD, {}) 82 …vocab = text.SentencePieceVocab.from_file([VOCAB_FILE], 5000, 0.9995, SentencePieceModel.UNIGRAM, … 94 …vocab = text.SentencePieceVocab.from_file([VOCAB_FILE], 5000, 0.9995, SentencePieceModel.UNIGRAM, … 107 …vocab = text.SentencePieceVocab.from_file([VOCAB_FILE], 5000, 0.9995, SentencePieceModel.UNIGRAM, … 121 …vocab = text.SentencePieceVocab.from_dataset(data, ["text"], 5000, 0.9995, SentencePieceModel.UNIG… [all …]
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/ |
D | build_sentence_piece_vocab_node.cc | 33 … float character_coverage, SentencePieceModel model_type, in BuildSentenceVocabNode() 73 if (model_type_ != SentencePieceModel::kUnigram && model_type_ != SentencePieceModel::kBpe && in ValidateParams() 74 model_type_ != SentencePieceModel::kChar && model_type_ != SentencePieceModel::kWord) { in ValidateParams()
|
D | build_sentence_piece_vocab_node.h | 37 … SentencePieceModel model_type, const std::unordered_map<std::string, std::string> ¶ms); 80 SentencePieceModel ModelType() const { return model_type_; } in ModelType() 88 SentencePieceModel model_type_;
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/text/ |
D | sentence_piece_vocab.cc | 34 … const float character_coverage, const SentencePieceModel model_type, in BuildFromFile() 54 if (model_type == SentencePieceModel::kWord) { in BuildFromFile() 56 } else if (model_type == SentencePieceModel::kBpe) { in BuildFromFile() 58 } else if (model_type == SentencePieceModel::kChar) { in BuildFromFile()
|
D | sentence_piece_vocab.h | 33 const float character_coverage, const SentencePieceModel model_type,
|
/third_party/mindspore/tests/ut/cpp/dataset/ |
D | c_api_text_sentence_piece_vocab_test.cc | 29 using mindspore::dataset::SentencePieceModel; 48 ds_vocab->BuildSentencePieceVocab({}, 5000, 0.9995, SentencePieceModel::kUnigram, {}); in TEST_F() 107 ds_vocab->BuildSentencePieceVocab({}, 5000, 0.9995, SentencePieceModel::kUnigram, {}); in TEST_F() 170 ds_vocab->BuildSentencePieceVocab({}, 0, 0.9995, SentencePieceModel::kUnigram, {}); in TEST_F() 175 ds_vocab->BuildSentencePieceVocab({}, 1, 0.979, SentencePieceModel::kUnigram, {}); in TEST_F() 180 ds_vocab->BuildSentencePieceVocab({}, 1, 1.01, SentencePieceModel::kUnigram, {}); in TEST_F() 185 ds_vocab->BuildSentencePieceVocab({"image"}, 2, 0.98, SentencePieceModel::kUnigram, {}); in TEST_F()
|
D | sentence_piece_vocab_op_test.cc | 79 std::move(spm), cols, 5000, 0.9995, SentencePieceModel::kUnigram, m_params, 2); in TEST_F() 117 …Status rc = SentencePieceVocab::BuildFromFile(path_list, 5000, 0.9995, SentencePieceModel::kUnigra… in TEST_F() 137 spm, cols, 5000, 0.9995, SentencePieceModel::kUnigram, m_params, 2); in TEST_F()
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/engine/datasetops/ |
D | build_sentence_piece_vocab_op.cc | 26 … float character_coverage, SentencePieceModel model_type, in BuildSentencePieceVocabOp() 105 if (model_type_ == SentencePieceModel::kBpe) { in BuildParams() 107 } else if (model_type_ == SentencePieceModel::kChar) { in BuildParams() 109 } else if (model_type_ == SentencePieceModel::kWord) { in BuildParams()
|
D | build_sentence_piece_vocab_op.h | 58 … int32_t vocab_size, float character_coverage, SentencePieceModel model_type, 95 SentencePieceModel model_type_;
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/include/dataset/ |
D | constants.h | 151 enum class SentencePieceModel { enum
|
D | datasets.h | 75 enum class SentencePieceModel; variable 248 SentencePieceModel model_type, const std::unordered_map<std::string, std::string> ¶ms) { in BuildSentencePieceVocab() 473 SentencePieceModel model_type, const std::map<std::vector<char>, std::vector<char>> ¶ms);
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/ |
D | bindings.cc | 145 float character_coverage, SentencePieceModel model_type, in __anon4440814b0c02()
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/ |
D | datasets.cc | 554 SentencePieceModel model_type, const std::map<std::vector<char>, std::vector<char>> ¶ms) { in BuildSentencePieceVocabCharIF()
|