/third_party/mindspore/tests/ut/cpp/dataset/ |
D | build_vocab_test.cc | 42 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 59 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 81 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 90 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 108 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 126 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 143 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 162 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 174 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 186 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local [all …]
|
D | c_api_text_vocab_test.cc | 54 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 108 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 163 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 210 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 236 std::shared_ptr<Vocab> vocab; in TEST_F() local 261 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 288 std::shared_ptr<Vocab> vocab = ds->BuildVocab({"text"}, {0, std::numeric_limits<int64_t>::max()}, in TEST_F() local 345 std::shared_ptr<Vocab> vocab = ds->BuildVocab(); in TEST_F() local 410 std::shared_ptr<Vocab> vocab = in TEST_F() local 425 std::shared_ptr<Vocab> vocab = in TEST_F() local [all …]
|
D | c_api_text_sentence_piece_vocab_test.cc | 47 std::shared_ptr<SentencePieceVocab> vocab = in TEST_F() local 106 std::shared_ptr<SentencePieceVocab> vocab = in TEST_F() local
|
D | c_api_text_test.cc | 241 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 304 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 365 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 427 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 487 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 547 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 607 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 702 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 2793 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local 2849 std::shared_ptr<Vocab> vocab = std::make_shared<Vocab>(); in TEST_F() local [all …]
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/text/ |
D | vocab.cc | 43 std::shared_ptr<Vocab> *vocab) { in BuildFromPyList() 67 Status Vocab::BuildFromPyDict(const py::dict &words, std::shared_ptr<Vocab> *vocab) { in BuildFromPyDict() 87 std::shared_ptr<Vocab> *vocab) { in BuildFromUnorderedMap() 104 bool prepend_special, std::shared_ptr<Vocab> *vocab) { in BuildFromVector() 137 std::shared_ptr<Vocab> *vocab) { in BuildFromFileCpp() 198 … const py::list &special_tokens, bool prepend_special, std::shared_ptr<Vocab> *vocab) { in BuildFromFile()
|
D | sentence_piece_vocab.cc | 36 std::shared_ptr<SentencePieceVocab> *vocab) { in BuildFromFile() 89 Status SentencePieceVocab::SaveModel(const std::shared_ptr<SentencePieceVocab> *vocab, std::string … in SaveModel()
|
D | vocab.h | 111 const std::unordered_map<WordType, WordIdType> vocab() { return word2id_; } in vocab() function
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/text/kernels/ |
D | bert_tokenizer_op.h | 42 … : wordpiece_tokenizer_(vocab, suffix_indicator, max_bytes_per_token, unknown_token, with_offsets), in wordpiece_tokenizer_() argument
|
D | lookup_op.cc | 22 LookupOp::LookupOp(std::shared_ptr<Vocab> vocab, WordIdType default_id, const DataType &data_type) in LookupOp()
|
D | sentence_piece_tokenizer_op.cc | 27 SentencePieceTokenizerOp::SentencePieceTokenizerOp(const std::shared_ptr<SentencePieceVocab> vocab, in SentencePieceTokenizerOp()
|
D | wordpiece_tokenizer_op.cc | 29 WordpieceTokenizerOp::WordpieceTokenizerOp(const std::shared_ptr<Vocab> &vocab, const std::string &… in WordpieceTokenizerOp()
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/kernels/ir/ |
D | bindings.cc | 48 bool with_offsets) { in __anonf0f064540302() 155 … const std::optional<std::string> &unknown_token, const std::string &data_type) { in __anonf0f064541502() 178 ….def(py::init([](const std::shared_ptr<SentencePieceVocab> &vocab, SPieceTokenizerOutType out_type… in __anonf0f064541902() 238 … int32_t max_bytes_per_token, const std::string &unknown_token, bool with_offsets) { in __anonf0f064542402()
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/text/ir/kernels/ |
D | text_ir.cc | 86 BertTokenizerOperation::BertTokenizerOperation(const std::shared_ptr<Vocab> &vocab, const std::stri… in BertTokenizerOperation() 193 LookupOperation::LookupOperation(const std::shared_ptr<Vocab> &vocab, const std::optional<std::stri… in LookupOperation() 198 LookupOperation::LookupOperation(const std::shared_ptr<Vocab> &vocab, const std::optional<std::stri… in LookupOperation() 334 …TokenizerOperation::SentencePieceTokenizerOperation(const std::shared_ptr<SentencePieceVocab> &voc… in SentencePieceTokenizerOperation() 474 WordpieceTokenizerOperation::WordpieceTokenizerOperation(const std::shared_ptr<Vocab> &vocab, in WordpieceTokenizerOperation()
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/include/dataset/ |
D | text.h | 98 …: BertTokenizer(vocab, StringToChar(suffix_indicator), max_bytes_per_token, StringToChar(unknown_t… in BertTokenizer() argument 537 …: WordpieceTokenizer(vocab, StringToChar(suffix_indicator), max_bytes_per_token, StringToChar(unkn… in WordpieceTokenizer() argument
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/ |
D | build_vocab_node.cc | 30 BuildVocabNode::BuildVocabNode(std::shared_ptr<DatasetNode> child, std::shared_ptr<Vocab> vocab, in BuildVocabNode()
|
D | build_sentence_piece_vocab_node.cc | 31 std::shared_ptr<SentencePieceVocab> vocab, in BuildSentenceVocabNode()
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/engine/datasetops/ |
D | build_sentence_piece_vocab_op.cc | 24 BuildSentencePieceVocabOp::BuildSentencePieceVocabOp(std::shared_ptr<SentencePieceVocab> vocab, in BuildSentencePieceVocabOp()
|
D | build_vocab_op.cc | 30 BuildVocabOp::BuildVocabOp(std::shared_ptr<Vocab> vocab, std::vector<std::string> col_names, in BuildVocabOp()
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/ |
D | bindings.cc | 77 std::string filename) { in __anonf58a86e70502()
|
/third_party/mindspore/mindspore/dataset/text/ |
D | transforms.py | 277 def __init__(self, vocab, unknown_token=None, data_type=mstype.int32): argument 513 def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, argument 691 … def __init__(self, vocab, suffix_indicator='##', max_bytes_per_token=100, unknown_token='[UNK]', argument
|
D | utils.py | 239 def save_model(cls, vocab, path, filename): argument
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/ |
D | text.cc | 93 BertTokenizer::BertTokenizer(const std::shared_ptr<Vocab> &vocab, const std::vector<char> &suffix_i… in BertTokenizer() 232 Lookup::Lookup(const std::shared_ptr<Vocab> &vocab, const std::optional<std::vector<char>> &unknown… in Lookup() 325 SentencePieceTokenizer::SentencePieceTokenizer(const std::shared_ptr<SentencePieceVocab> &vocab, in SentencePieceTokenizer() 404 WordpieceTokenizer::WordpieceTokenizer(const std::shared_ptr<Vocab> &vocab, const std::vector<char>… in WordpieceTokenizer()
|
D | datasets.cc | 555 auto vocab = std::make_shared<SentencePieceVocab>(); in BuildSentencePieceVocabCharIF() local 592 auto vocab = std::make_shared<Vocab>(); in BuildVocabCharIF() local
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/ |
D | bindings.cc | 146 const std::unordered_map<std::string, std::string> ¶ms) { in __anonb78612490c02() 158 … py::tuple freq_range, int64_t top_k, py::list special_tokens, bool special_first) { in __anonb78612490e02()
|
/third_party/mindspore/tests/st/fl/albert/src/ |
D | tokenization.py | 290 def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100): argument
|