Home
last modified time | relevance | path

Searched refs:special_tokens (Results 1 – 14 of 14) sorted by relevance

/third_party/mindspore/mindspore/ccsrc/minddata/dataset/text/
Dvocab.cc42 Status Vocab::BuildFromPyList(const py::list &words, const py::list &special_tokens, bool prepend_s… in BuildFromPyList() argument
51 WordIdType word_id = prepend_special ? static_cast<WordIdType>(special_tokens.size()) : 0; in BuildFromPyList()
59 for (auto special_token : special_tokens) { in BuildFromPyList()
103 …b::BuildFromVector(const std::vector<WordType> &words, const std::vector<WordType> &special_tokens, in BuildFromVector() argument
111 WordIdType word_id = prepend_special ? static_cast<WordIdType>(special_tokens.size()) : 0; in BuildFromVector()
121 for (auto special_token : special_tokens) { in BuildFromVector()
136 const std::vector<WordType> &special_tokens, bool prepend_special, in BuildFromFileCpp() argument
150 for (const WordType &sp : special_tokens) { in BuildFromFileCpp()
151 if (std::count(special_tokens.begin(), special_tokens.end(), sp) > 1) { in BuildFromFileCpp()
162 for (auto word : special_tokens) { in BuildFromFileCpp()
[all …]
Dvocab.h53 …static Status BuildFromPyList(const py::list &words, const py::list &special_tokens, bool prepend_…
63 … const py::list &special_tokens, bool prepend_special, std::shared_ptr<Vocab> *vocab);
79 …us BuildFromVector(const std::vector<WordType> &words, const std::vector<WordType> &special_tokens,
91 const std::vector<WordType> &special_tokens, bool prepend_special,
/third_party/mindspore/mindspore/dataset/text/
Dutils.py41 …def from_dataset(cls, dataset, columns=None, freq_range=None, top_k=None, special_tokens=None, spe… argument
77 return dataset.build_vocab(columns, freq_range, top_k, special_tokens, special_first)
81 def from_list(cls, word_list, special_tokens=None, special_first=True): argument
98 if special_tokens is None:
99 special_tokens = []
100 return super().from_list(word_list, special_tokens, special_first)
104 …def from_file(cls, file_path, delimiter="", vocab_size=None, special_tokens=None, special_first=Tr… argument
127 if special_tokens is None:
128 special_tokens = []
129 return super().from_file(file_path, delimiter, vocab_size, special_tokens, special_first)
Dvalidators.py65 …[file_path, delimiter, vocab_size, special_tokens, special_first], _ = parse_user_args(method, *ar…
67 if special_tokens is not None:
68 check_unique_list_of_words(special_tokens, "special_tokens")
84 [word_list, special_tokens, special_first], _ = parse_user_args(method, *args, **kwargs)
87 if special_tokens is not None:
88 token_set = check_unique_list_of_words(special_tokens, "special_tokens")
310 … [_, columns, freq_range, top_k, special_tokens, special_first], _ = parse_user_args(method, *args,
338 if special_tokens is not None:
339 check_unique_list_of_words(special_tokens, "special_tokens")
/third_party/mindspore/tests/ut/python/dataset/
Dtest_from_dataset.py27 special_tokens=["<pad>", "<unk>"],
40 …vocab = text.Vocab.from_dataset(data, None, freq_range=None, top_k=None, special_tokens=["<pad>", …
61 …vocab = text.Vocab.from_dataset(corpus_dataset, None, freq_range, top_k, special_tokens=["<pad>", …
107 def test_config(texts, top_k, special_tokens, special_first): argument
109 … vocab = text.Vocab.from_dataset(corpus_dataset, None, None, top_k, special_tokens, special_first)
Dtest_vocab.py89 def test_config(lookup_str, vocab_input, special_tokens, special_first, unknown_token): argument
91 vocab = text.Vocab.from_list(vocab_input, special_tokens, special_first)
152 def test_config(lookup_str, vocab_size, special_tokens, special_first): argument
154 …cab = text.Vocab.from_file(SIMPLE_VOCAB_FILE, vocab_size=vocab_size, special_tokens=special_tokens,
185 … vocab = text.Vocab.from_list(["w1", "w2", "w3"], special_tokens=["<unk>"], special_first=True)
Dtest_nlp.py36 vocab = text.Vocab.from_file(VOCAB_FILE, ",", special_tokens=["<pad>", "<unk>"])
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/
Dbindings.cc32 … [](const py::list &words, const py::list &special_tokens, bool special_first) { in __anonb29b31e90102() argument
34 … THROW_IF_ERROR(Vocab::BuildFromPyList(words, special_tokens, special_first, &v)); in __anonb29b31e90102()
40 const py::list &special_tokens, bool special_first) { in __anonb29b31e90102() argument
42 … THROW_IF_ERROR(Vocab::BuildFromFile(path, dlm, vocab_size, special_tokens, special_first, &v)); in __anonb29b31e90102()
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/
Dbuild_vocab_node.cc32 … int64_t top_k, const std::vector<std::string> &special_tokens, bool special_first) in BuildVocabNode() argument
37 special_tokens_(special_tokens), in BuildVocabNode()
Dbuild_vocab_node.h35 const std::vector<std::string> &special_tokens, bool special_first);
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/
Dbindings.cc158 … py::tuple freq_range, int64_t top_k, py::list special_tokens, bool special_first) { in __anon4440814b0e02() argument
161 … top_k, toStringVector(special_tokens), special_first); in __anon4440814b0e02()
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/include/dataset/
Ddatasets.h268 … int64_t top_k = kDeMaxTopk, const std::vector<std::string> &special_tokens = {},
270 …uildVocabCharIF(VectorStringToChar(columns), freq_range, top_k, VectorStringToChar(special_tokens),
478 … const std::vector<std::vector<char>> &special_tokens, bool special_first);
/third_party/mindspore/mindspore/dataset/engine/
Ddatasets.py1108 def build_vocab(self, columns, freq_range, top_k, special_tokens, special_first): argument
1157 special_tokens = replace_none(special_tokens, [])
1163 …vocab_node = cde.BuildVocabNode(ir_tree, vocab, columns, freq_range, top_k, special_tokens, specia…
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/
Ddatasets.cc590 … const std::vector<std::vector<char>> &special_tokens, in BuildVocabCharIF() argument
594 VectorCharToString(special_tokens), special_first); in BuildVocabCharIF()