/third_party/mindspore/mindspore/ccsrc/minddata/dataset/text/ |
D | vocab.cc | 42 Status Vocab::BuildFromPyList(const py::list &words, const py::list &special_tokens, bool prepend_s… in BuildFromPyList() argument 51 WordIdType word_id = prepend_special ? static_cast<WordIdType>(special_tokens.size()) : 0; in BuildFromPyList() 59 for (auto special_token : special_tokens) { in BuildFromPyList() 103 …b::BuildFromVector(const std::vector<WordType> &words, const std::vector<WordType> &special_tokens, in BuildFromVector() argument 111 WordIdType word_id = prepend_special ? static_cast<WordIdType>(special_tokens.size()) : 0; in BuildFromVector() 121 for (auto special_token : special_tokens) { in BuildFromVector() 136 const std::vector<WordType> &special_tokens, bool prepend_special, in BuildFromFileCpp() argument 150 for (const WordType &sp : special_tokens) { in BuildFromFileCpp() 151 if (std::count(special_tokens.begin(), special_tokens.end(), sp) > 1) { in BuildFromFileCpp() 162 for (auto word : special_tokens) { in BuildFromFileCpp() [all …]
|
D | vocab.h | 53 …static Status BuildFromPyList(const py::list &words, const py::list &special_tokens, bool prepend_… 63 … const py::list &special_tokens, bool prepend_special, std::shared_ptr<Vocab> *vocab); 79 …us BuildFromVector(const std::vector<WordType> &words, const std::vector<WordType> &special_tokens, 91 const std::vector<WordType> &special_tokens, bool prepend_special,
|
/third_party/mindspore/mindspore/dataset/text/ |
D | utils.py | 41 …def from_dataset(cls, dataset, columns=None, freq_range=None, top_k=None, special_tokens=None, spe… argument 77 return dataset.build_vocab(columns, freq_range, top_k, special_tokens, special_first) 81 def from_list(cls, word_list, special_tokens=None, special_first=True): argument 98 if special_tokens is None: 99 special_tokens = [] 100 return super().from_list(word_list, special_tokens, special_first) 104 …def from_file(cls, file_path, delimiter="", vocab_size=None, special_tokens=None, special_first=Tr… argument 127 if special_tokens is None: 128 special_tokens = [] 129 return super().from_file(file_path, delimiter, vocab_size, special_tokens, special_first)
|
D | validators.py | 65 …[file_path, delimiter, vocab_size, special_tokens, special_first], _ = parse_user_args(method, *ar… 67 if special_tokens is not None: 68 check_unique_list_of_words(special_tokens, "special_tokens") 84 [word_list, special_tokens, special_first], _ = parse_user_args(method, *args, **kwargs) 87 if special_tokens is not None: 88 token_set = check_unique_list_of_words(special_tokens, "special_tokens") 310 … [_, columns, freq_range, top_k, special_tokens, special_first], _ = parse_user_args(method, *args, 338 if special_tokens is not None: 339 check_unique_list_of_words(special_tokens, "special_tokens")
|
/third_party/mindspore/tests/ut/python/dataset/ |
D | test_from_dataset.py | 27 special_tokens=["<pad>", "<unk>"], 40 …vocab = text.Vocab.from_dataset(data, None, freq_range=None, top_k=None, special_tokens=["<pad>", … 61 …vocab = text.Vocab.from_dataset(corpus_dataset, None, freq_range, top_k, special_tokens=["<pad>", … 107 def test_config(texts, top_k, special_tokens, special_first): argument 109 … vocab = text.Vocab.from_dataset(corpus_dataset, None, None, top_k, special_tokens, special_first)
|
D | test_vocab.py | 89 def test_config(lookup_str, vocab_input, special_tokens, special_first, unknown_token): argument 91 vocab = text.Vocab.from_list(vocab_input, special_tokens, special_first) 152 def test_config(lookup_str, vocab_size, special_tokens, special_first): argument 154 …cab = text.Vocab.from_file(SIMPLE_VOCAB_FILE, vocab_size=vocab_size, special_tokens=special_tokens, 185 … vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
|
D | test_nlp.py | 36 vocab = text.Vocab.from_file(VOCAB_FILE, ",", special_tokens=["<pad>", "<unk>"])
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/ |
D | bindings.cc | 32 … [](const py::list &words, const py::list &special_tokens, bool special_first) { in __anonb29b31e90102() argument 34 … THROW_IF_ERROR(Vocab::BuildFromPyList(words, special_tokens, special_first, &v)); in __anonb29b31e90102() 40 const py::list &special_tokens, bool special_first) { in __anonb29b31e90102() argument 42 … THROW_IF_ERROR(Vocab::BuildFromFile(path, dlm, vocab_size, special_tokens, special_first, &v)); in __anonb29b31e90102()
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/ |
D | build_vocab_node.cc | 32 … int64_t top_k, const std::vector<std::string> &special_tokens, bool special_first) in BuildVocabNode() argument 37 special_tokens_(special_tokens), in BuildVocabNode()
|
D | build_vocab_node.h | 35 const std::vector<std::string> &special_tokens, bool special_first);
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/ |
D | bindings.cc | 158 … py::tuple freq_range, int64_t top_k, py::list special_tokens, bool special_first) { in __anon4440814b0e02() argument 161 … top_k, toStringVector(special_tokens), special_first); in __anon4440814b0e02()
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/include/dataset/ |
D | datasets.h | 268 … int64_t top_k = kDeMaxTopk, const std::vector<std::string> &special_tokens = {}, 270 …uildVocabCharIF(VectorStringToChar(columns), freq_range, top_k, VectorStringToChar(special_tokens), 478 … const std::vector<std::vector<char>> &special_tokens, bool special_first);
|
/third_party/mindspore/mindspore/dataset/engine/ |
D | datasets.py | 1108 def build_vocab(self, columns, freq_range, top_k, special_tokens, special_first): argument 1157 special_tokens = replace_none(special_tokens, []) 1163 …vocab_node = cde.BuildVocabNode(ir_tree, vocab, columns, freq_range, top_k, special_tokens, specia…
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/ |
D | datasets.cc | 590 … const std::vector<std::vector<char>> &special_tokens, in BuildVocabCharIF() argument 594 VectorCharToString(special_tokens), special_first); in BuildVocabCharIF()
|