/third_party/mindspore/mindspore/ccsrc/minddata/dataset/text/kernels/ |
D | normalize_utf8_op.cc | 26 const NormalizeForm NormalizeUTF8Op::kDefNormalizeForm = NormalizeForm::kNfkc; 34 case NormalizeForm::kNone: { in Compute() 38 case NormalizeForm::kNfc: { in Compute() 43 case NormalizeForm::kNfkc: { in Compute() 48 case NormalizeForm::kNfd: { in Compute() 53 case NormalizeForm::kNfkd: { in Compute()
|
D | normalize_utf8_op.h | 30 static const NormalizeForm kDefNormalizeForm; 31 …explicit NormalizeUTF8Op(NormalizeForm normalize_form = kDefNormalizeForm) : normalize_form_(norma… in normalize_form_() 40 NormalizeForm normalize_form_;
|
D | basic_tokenizer_op.h | 38 static const NormalizeForm kDefNormalizationForm; 42 const NormalizeForm &normalization_form = kDefNormalizationForm, 63 NormalizeForm normalization_form_;
|
D | basic_tokenizer_op.cc | 32 const NormalizeForm BasicTokenizerOp::kDefNormalizationForm = NormalizeForm::kNone; 52 … const NormalizeForm &normalization_form, const bool &preserve_unused_token, in BasicTokenizerOp() 60 nfd_normalize_(std::make_unique<NormalizeUTF8Op>(NormalizeForm::kNfd)), in BasicTokenizerOp()
|
D | bert_tokenizer_op.h | 39 … const NormalizeForm &normalization_form = BasicTokenizerOp::kDefNormalizationForm,
|
/third_party/mindspore/mindspore/dataset/text/ |
D | transforms.py | 50 from .utils import JiebaMode, NormalizeForm, to_str, SPieceTokenizerOutType, SPieceTokenizerLoadType 559 NormalizeForm.NONE: cde.NormalizeForm.DE_NORMALIZE_NONE, 560 NormalizeForm.NFC: cde.NormalizeForm.DE_NORMALIZE_NFC, 561 NormalizeForm.NFKC: cde.NormalizeForm.DE_NORMALIZE_NFKC, 562 NormalizeForm.NFD: cde.NormalizeForm.DE_NORMALIZE_NFD, 563 NormalizeForm.NFKD: cde.NormalizeForm.DE_NORMALIZE_NFKD 620 … def __init__(self, lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE, 622 if not isinstance(normalization_form, NormalizeForm): 692 lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE, 694 if not isinstance(normalization_form, NormalizeForm): [all …]
|
D | __init__.py | 30 from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm, SentencePieceVocab, SentenceP…
|
D | utils.py | 314 class NormalizeForm(IntEnum): class
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/text/ir/kernels/ |
D | text_ir.cc | 60 … const NormalizeForm normalize_form, bool preserve_unused_token, in BasicTokenizerOperation() 69 if (normalize_form_ != NormalizeForm::kNone && normalize_form_ != NormalizeForm::kNfc && in ValidateParams() 70 normalize_form_ != NormalizeForm::kNfkc && normalize_form_ != NormalizeForm::kNfd && in ValidateParams() 71 normalize_form_ != NormalizeForm::kNfkd) { in ValidateParams() 89 … const NormalizeForm normalize_form, bool preserve_unused_token, in BertTokenizerOperation() 110 if (normalize_form_ != NormalizeForm::kNone && normalize_form_ != NormalizeForm::kNfc && in ValidateParams() 111 normalize_form_ != NormalizeForm::kNfkc && normalize_form_ != NormalizeForm::kNfd && in ValidateParams() 112 normalize_form_ != NormalizeForm::kNfkd) { in ValidateParams() 288 NormalizeUTF8Operation::NormalizeUTF8Operation(NormalizeForm normalize_form) : normalize_form_(norm… in NormalizeUTF8Operation() 291 if (normalize_form_ != NormalizeForm::kNone && normalize_form_ != NormalizeForm::kNfc && in ValidateParams() [all …]
|
D | text_ir.h | 59 BasicTokenizerOperation(bool lower_case, bool keep_whitespace, const NormalizeForm normalize_form, 73 NormalizeForm normalize_form_; 82 … bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token, 100 NormalizeForm normalize_form_; 187 explicit NormalizeUTF8Operation(NormalizeForm normalize_form); 198 NormalizeForm normalize_form_;
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/kernels/ir/ |
D | bindings.cc | 32 .def(py::init([](bool lower_case, bool keep_whitespace, const NormalizeForm normalize_form, in __anon5546ea160102() 47 … bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token, in __anon5546ea160302() 71 .def(py::init([](NormalizeForm normalize_form) { in __anon5546ea160702() 125 PYBIND_REGISTER(NormalizeForm, 0, ([](const py::module *m) { in __anon5546ea161102() 126 (void)py::enum_<NormalizeForm>(*m, "NormalizeForm", py::arithmetic()) in __anon5546ea161102() 127 .value("DE_NORMALIZE_NONE", NormalizeForm::kNone) in __anon5546ea161102() 128 .value("DE_NORMALIZE_NFC", NormalizeForm::kNfc) in __anon5546ea161102() 129 .value("DE_NORMALIZE_NFKC", NormalizeForm::kNfkc) in __anon5546ea161102() 130 .value("DE_NORMALIZE_NFD", NormalizeForm::kNfd) in __anon5546ea161102() 131 .value("DE_NORMALIZE_NFKD", NormalizeForm::kNfkd) in __anon5546ea161102()
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/include/dataset/ |
D | text.h | 57 … const NormalizeForm normalize_form = NormalizeForm::kNone, bool preserve_unused_token = true, 96 … const NormalizeForm normalize_form = NormalizeForm::kNone, bool preserve_unused_token = true, 118 … bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token, 328 explicit NormalizeUTF8(NormalizeForm normalize_form = NormalizeForm::kNfkc);
|
D | constants.h | 159 enum class NormalizeForm { enum
|
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/ |
D | text.cc | 45 …Data(bool lower_case, bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unus… in Data() 54 NormalizeForm normalize_form_; 59 BasicTokenizer::BasicTokenizer(bool lower_case, bool keep_whitespace, const NormalizeForm normalize… in BasicTokenizer() 72 const NormalizeForm normalize_form, bool preserve_unused_token, bool with_offsets) in Data() 88 NormalizeForm normalize_form_; 95 … bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token, in BertTokenizer() 267 explicit Data(NormalizeForm normalize_form) : normalize_form_(normalize_form) {} in Data() 268 NormalizeForm normalize_form_; 271 NormalizeUTF8::NormalizeUTF8(NormalizeForm normalize_form) : data_(std::make_shared<Data>(normalize… in NormalizeUTF8()
|
/third_party/mindspore/tests/ut/python/dataset/ |
D | test_text_bert_tokenizer.py | 81 normalization_form=text.utils.NormalizeForm.NFKC, 175 normalization_form=text.utils.NormalizeForm.NONE, 204 normalization_form=text.utils.NormalizeForm.NONE,
|
D | test_text_basic_tokenizer.py | 71 … normalization_form=text.utils.NormalizeForm.NONE, preserve_unused_token=False): 95 … normalization_form=text.utils.NormalizeForm.NONE, preserve_unused_token=False):
|
D | test_text_tokenizer.py | 269 assert normalize(text.utils.NormalizeForm.NFC) == expect_normlize_data[0] 270 assert normalize(text.utils.NormalizeForm.NFKC) == expect_normlize_data[1] 271 assert normalize(text.utils.NormalizeForm.NFD) == expect_normlize_data[2] 272 assert normalize(text.utils.NormalizeForm.NFKD) == expect_normlize_data[3]
|
/third_party/mindspore/tests/ut/cpp/dataset/ |
D | tokenizer_op_test.cc | 303 std::unique_ptr<NormalizeUTF8Op> nfc_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfc)); in TEST_F() 304 std::unique_ptr<NormalizeUTF8Op> nfkc_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfkc)); in TEST_F() 305 std::unique_ptr<NormalizeUTF8Op> nfd_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfd)); in TEST_F() 306 std::unique_ptr<NormalizeUTF8Op> nfkd_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfkd)); in TEST_F() 355 …std::unique_ptr<BasicTokenizerOp> basic_tokenizer(new BasicTokenizerOp(true, true, NormalizeForm::… in TEST_F()
|
D | c_api_text_test.cc | 164 std::make_shared<text::BasicTokenizer>(true, false, NormalizeForm::kNone, true, true); in TEST_F() 371 …std::make_shared<text::BertTokenizer>(vocab, "##", 100, "[UNK]", false, false, NormalizeForm::kNfc… in TEST_F() 553 …std::make_shared<text::BertTokenizer>(vocab, "##", 100, "", false, true, NormalizeForm::kNone, fal… in TEST_F() 613 …std::make_shared<text::BertTokenizer>(vocab, "##", 100, "[UNK]", true, false, NormalizeForm::kNone… in TEST_F() 2241 …d_ptr<TensorTransform> normalizeutf8 = std::make_shared<text::NormalizeUTF8>(NormalizeForm::kNfkc); in TEST_F() 2287 …ed_ptr<TensorTransform> normalizeutf8 = std::make_shared<text::NormalizeUTF8>(NormalizeForm::kNfc); in TEST_F() 2333 …ed_ptr<TensorTransform> normalizeutf8 = std::make_shared<text::NormalizeUTF8>(NormalizeForm::kNfd); in TEST_F() 2379 …d_ptr<TensorTransform> normalizeutf8 = std::make_shared<text::NormalizeUTF8>(NormalizeForm::kNfkd); in TEST_F()
|
D | execute_test.cc | 460 std::make_shared<text::BasicTokenizer>(false, false, NormalizeForm::kNone, false, true); in TEST_F()
|