Home
last modified time | relevance | path

Searched refs:NormalizeForm (Results 1 – 20 of 20) sorted by relevance

/third_party/mindspore/mindspore/ccsrc/minddata/dataset/text/kernels/
Dnormalize_utf8_op.cc26 const NormalizeForm NormalizeUTF8Op::kDefNormalizeForm = NormalizeForm::kNfkc;
34 case NormalizeForm::kNone: { in Compute()
38 case NormalizeForm::kNfc: { in Compute()
43 case NormalizeForm::kNfkc: { in Compute()
48 case NormalizeForm::kNfd: { in Compute()
53 case NormalizeForm::kNfkd: { in Compute()
Dnormalize_utf8_op.h30 static const NormalizeForm kDefNormalizeForm;
31 …explicit NormalizeUTF8Op(NormalizeForm normalize_form = kDefNormalizeForm) : normalize_form_(norma… in normalize_form_()
40 NormalizeForm normalize_form_;
Dbasic_tokenizer_op.h38 static const NormalizeForm kDefNormalizationForm;
42 const NormalizeForm &normalization_form = kDefNormalizationForm,
63 NormalizeForm normalization_form_;
Dbasic_tokenizer_op.cc32 const NormalizeForm BasicTokenizerOp::kDefNormalizationForm = NormalizeForm::kNone;
52 … const NormalizeForm &normalization_form, const bool &preserve_unused_token, in BasicTokenizerOp()
60 nfd_normalize_(std::make_unique<NormalizeUTF8Op>(NormalizeForm::kNfd)), in BasicTokenizerOp()
Dbert_tokenizer_op.h39 … const NormalizeForm &normalization_form = BasicTokenizerOp::kDefNormalizationForm,
/third_party/mindspore/mindspore/dataset/text/
Dtransforms.py50 from .utils import JiebaMode, NormalizeForm, to_str, SPieceTokenizerOutType, SPieceTokenizerLoadType
559 NormalizeForm.NONE: cde.NormalizeForm.DE_NORMALIZE_NONE,
560 NormalizeForm.NFC: cde.NormalizeForm.DE_NORMALIZE_NFC,
561 NormalizeForm.NFKC: cde.NormalizeForm.DE_NORMALIZE_NFKC,
562 NormalizeForm.NFD: cde.NormalizeForm.DE_NORMALIZE_NFD,
563 NormalizeForm.NFKD: cde.NormalizeForm.DE_NORMALIZE_NFKD
620 … def __init__(self, lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE,
622 if not isinstance(normalization_form, NormalizeForm):
692 lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE,
694 if not isinstance(normalization_form, NormalizeForm):
[all …]
D__init__.py30 from .utils import to_str, to_bytes, JiebaMode, Vocab, NormalizeForm, SentencePieceVocab, SentenceP…
Dutils.py314 class NormalizeForm(IntEnum): class
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/text/ir/kernels/
Dtext_ir.cc60 … const NormalizeForm normalize_form, bool preserve_unused_token, in BasicTokenizerOperation()
69 if (normalize_form_ != NormalizeForm::kNone && normalize_form_ != NormalizeForm::kNfc && in ValidateParams()
70 normalize_form_ != NormalizeForm::kNfkc && normalize_form_ != NormalizeForm::kNfd && in ValidateParams()
71 normalize_form_ != NormalizeForm::kNfkd) { in ValidateParams()
89 … const NormalizeForm normalize_form, bool preserve_unused_token, in BertTokenizerOperation()
110 if (normalize_form_ != NormalizeForm::kNone && normalize_form_ != NormalizeForm::kNfc && in ValidateParams()
111 normalize_form_ != NormalizeForm::kNfkc && normalize_form_ != NormalizeForm::kNfd && in ValidateParams()
112 normalize_form_ != NormalizeForm::kNfkd) { in ValidateParams()
288 NormalizeUTF8Operation::NormalizeUTF8Operation(NormalizeForm normalize_form) : normalize_form_(norm… in NormalizeUTF8Operation()
291 if (normalize_form_ != NormalizeForm::kNone && normalize_form_ != NormalizeForm::kNfc && in ValidateParams()
[all …]
Dtext_ir.h59 BasicTokenizerOperation(bool lower_case, bool keep_whitespace, const NormalizeForm normalize_form,
73 NormalizeForm normalize_form_;
82 … bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token,
100 NormalizeForm normalize_form_;
187 explicit NormalizeUTF8Operation(NormalizeForm normalize_form);
198 NormalizeForm normalize_form_;
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/kernels/ir/
Dbindings.cc32 .def(py::init([](bool lower_case, bool keep_whitespace, const NormalizeForm normalize_form, in __anon5546ea160102()
47 … bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token, in __anon5546ea160302()
71 .def(py::init([](NormalizeForm normalize_form) { in __anon5546ea160702()
125 PYBIND_REGISTER(NormalizeForm, 0, ([](const py::module *m) { in __anon5546ea161102()
126 (void)py::enum_<NormalizeForm>(*m, "NormalizeForm", py::arithmetic()) in __anon5546ea161102()
127 .value("DE_NORMALIZE_NONE", NormalizeForm::kNone) in __anon5546ea161102()
128 .value("DE_NORMALIZE_NFC", NormalizeForm::kNfc) in __anon5546ea161102()
129 .value("DE_NORMALIZE_NFKC", NormalizeForm::kNfkc) in __anon5546ea161102()
130 .value("DE_NORMALIZE_NFD", NormalizeForm::kNfd) in __anon5546ea161102()
131 .value("DE_NORMALIZE_NFKD", NormalizeForm::kNfkd) in __anon5546ea161102()
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/include/dataset/
Dtext.h57 … const NormalizeForm normalize_form = NormalizeForm::kNone, bool preserve_unused_token = true,
96 … const NormalizeForm normalize_form = NormalizeForm::kNone, bool preserve_unused_token = true,
118 … bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token,
328 explicit NormalizeUTF8(NormalizeForm normalize_form = NormalizeForm::kNfkc);
Dconstants.h159 enum class NormalizeForm { enum
/third_party/mindspore/mindspore/ccsrc/minddata/dataset/api/
Dtext.cc45 …Data(bool lower_case, bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unus… in Data()
54 NormalizeForm normalize_form_;
59 BasicTokenizer::BasicTokenizer(bool lower_case, bool keep_whitespace, const NormalizeForm normalize… in BasicTokenizer()
72 const NormalizeForm normalize_form, bool preserve_unused_token, bool with_offsets) in Data()
88 NormalizeForm normalize_form_;
95 … bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token, in BertTokenizer()
267 explicit Data(NormalizeForm normalize_form) : normalize_form_(normalize_form) {} in Data()
268 NormalizeForm normalize_form_;
271 NormalizeUTF8::NormalizeUTF8(NormalizeForm normalize_form) : data_(std::make_shared<Data>(normalize… in NormalizeUTF8()
/third_party/mindspore/tests/ut/python/dataset/
Dtest_text_bert_tokenizer.py81 normalization_form=text.utils.NormalizeForm.NFKC,
175 normalization_form=text.utils.NormalizeForm.NONE,
204 normalization_form=text.utils.NormalizeForm.NONE,
Dtest_text_basic_tokenizer.py71 … normalization_form=text.utils.NormalizeForm.NONE, preserve_unused_token=False):
95 … normalization_form=text.utils.NormalizeForm.NONE, preserve_unused_token=False):
Dtest_text_tokenizer.py269 assert normalize(text.utils.NormalizeForm.NFC) == expect_normlize_data[0]
270 assert normalize(text.utils.NormalizeForm.NFKC) == expect_normlize_data[1]
271 assert normalize(text.utils.NormalizeForm.NFD) == expect_normlize_data[2]
272 assert normalize(text.utils.NormalizeForm.NFKD) == expect_normlize_data[3]
/third_party/mindspore/tests/ut/cpp/dataset/
Dtokenizer_op_test.cc303 std::unique_ptr<NormalizeUTF8Op> nfc_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfc)); in TEST_F()
304 std::unique_ptr<NormalizeUTF8Op> nfkc_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfkc)); in TEST_F()
305 std::unique_ptr<NormalizeUTF8Op> nfd_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfd)); in TEST_F()
306 std::unique_ptr<NormalizeUTF8Op> nfkd_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfkd)); in TEST_F()
355 …std::unique_ptr<BasicTokenizerOp> basic_tokenizer(new BasicTokenizerOp(true, true, NormalizeForm::… in TEST_F()
Dc_api_text_test.cc164 std::make_shared<text::BasicTokenizer>(true, false, NormalizeForm::kNone, true, true); in TEST_F()
371 …std::make_shared<text::BertTokenizer>(vocab, "##", 100, "[UNK]", false, false, NormalizeForm::kNfc… in TEST_F()
553 …std::make_shared<text::BertTokenizer>(vocab, "##", 100, "", false, true, NormalizeForm::kNone, fal… in TEST_F()
613 …std::make_shared<text::BertTokenizer>(vocab, "##", 100, "[UNK]", true, false, NormalizeForm::kNone… in TEST_F()
2241 …d_ptr<TensorTransform> normalizeutf8 = std::make_shared<text::NormalizeUTF8>(NormalizeForm::kNfkc); in TEST_F()
2287 …ed_ptr<TensorTransform> normalizeutf8 = std::make_shared<text::NormalizeUTF8>(NormalizeForm::kNfc); in TEST_F()
2333 …ed_ptr<TensorTransform> normalizeutf8 = std::make_shared<text::NormalizeUTF8>(NormalizeForm::kNfd); in TEST_F()
2379 …d_ptr<TensorTransform> normalizeutf8 = std::make_shared<text::NormalizeUTF8>(NormalizeForm::kNfkd); in TEST_F()
Dexecute_test.cc460 std::make_shared<text::BasicTokenizer>(false, false, NormalizeForm::kNone, false, true); in TEST_F()