• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/engine/ir/datasetops/source/speech_commands_node.h"
17 
18 #include "minddata/dataset/engine/datasetops/source/speech_commands_op.h"
19 
20 namespace mindspore {
21 namespace dataset {
SpeechCommandsNode(const std::string & dataset_dir,const std::string & usage,std::shared_ptr<SamplerObj> sampler,std::shared_ptr<DatasetCache> cache)22 SpeechCommandsNode::SpeechCommandsNode(const std::string &dataset_dir, const std::string &usage,
23                                        std::shared_ptr<SamplerObj> sampler, std::shared_ptr<DatasetCache> cache)
24     : MappableSourceNode(std::move(cache)), dataset_dir_(dataset_dir), usage_(usage), sampler_(sampler) {}
25 
Copy()26 std::shared_ptr<DatasetNode> SpeechCommandsNode::Copy() {
27   std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
28   auto node = std::make_shared<SpeechCommandsNode>(dataset_dir_, usage_, sampler, cache_);
29   (void)node->SetNumWorkers(num_workers_);
30   (void)node->SetConnectorQueueSize(connector_que_size_);
31   return node;
32 }
33 
Print(std::ostream & out) const34 void SpeechCommandsNode::Print(std::ostream &out) const {
35   out << (Name() + "(cache: " + ((cache_ != nullptr) ? "true" : "false") + ")");
36 }
37 
ValidateParams()38 Status SpeechCommandsNode::ValidateParams() {
39   RETURN_IF_NOT_OK(DatasetNode::ValidateParams());
40   RETURN_IF_NOT_OK(ValidateDatasetDirParam("SpeechCommandsNode", dataset_dir_));
41   RETURN_IF_NOT_OK(ValidateDatasetSampler("SpeechCommandsNode", sampler_));
42   RETURN_IF_NOT_OK(ValidateStringValue("SpeechCommandsNode", usage_, {"train", "valid", "test", "all"}));
43   return Status::OK();
44 }
45 
Build(std::vector<std::shared_ptr<DatasetOp>> * const node_ops)46 Status SpeechCommandsNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
47   auto schema = std::make_unique<DataSchema>();
48   RETURN_IF_NOT_OK(
49     schema->AddColumn(ColDescriptor("waveform", DataType(DataType::DE_FLOAT32), TensorImpl::kFlexible, 1)));
50   TensorShape sample_rate_scalar = TensorShape::CreateScalar();
51   TensorShape label_scalar = TensorShape::CreateScalar();
52   TensorShape speaker_id_scalar = TensorShape::CreateScalar();
53   TensorShape utterance_number_scalar = TensorShape::CreateScalar();
54   RETURN_IF_NOT_OK(schema->AddColumn(
55     ColDescriptor("sample_rate", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &sample_rate_scalar)));
56   RETURN_IF_NOT_OK(
57     schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, &label_scalar)));
58   RETURN_IF_NOT_OK(schema->AddColumn(
59     ColDescriptor("speaker_id", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, &speaker_id_scalar)));
60   RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("utterance_number", DataType(DataType::DE_INT32),
61                                                    TensorImpl::kFlexible, 0, &utterance_number_scalar)));
62   std::shared_ptr<SamplerRT> sampler_rt = nullptr;
63   RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
64 
65   auto speech_commands_op = std::make_shared<SpeechCommandsOp>(dataset_dir_, usage_, num_workers_, connector_que_size_,
66                                                                std::move(schema), std::move(sampler_rt));
67   speech_commands_op->SetTotalRepeats(GetTotalRepeats());
68   speech_commands_op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
69   node_ops->push_back(speech_commands_op);
70   return Status::OK();
71 }
72 
GetShardId(int32_t * shard_id)73 Status SpeechCommandsNode::GetShardId(int32_t *shard_id) {
74   *shard_id = sampler_->ShardId();
75   return Status::OK();
76 }
77 
GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> & size_getter,bool estimate,int64_t * dataset_size)78 Status SpeechCommandsNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
79                                           int64_t *dataset_size) {
80   if (dataset_size_ > 0) {
81     *dataset_size = dataset_size_;
82     return Status::OK();
83   }
84   int64_t sample_size, num_rows;
85   std::vector<std::shared_ptr<DatasetOp>> ops;
86   RETURN_IF_NOT_OK(Build(&ops));
87   CHECK_FAIL_RETURN_UNEXPECTED(!ops.empty(), "Unable to build SpeechCommandsOp.");
88   auto op = std::dynamic_pointer_cast<SpeechCommandsOp>(ops.front());
89   RETURN_IF_NOT_OK(op->CountTotalRows(&num_rows));
90   std::shared_ptr<SamplerRT> sampler_rt = nullptr;
91   RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
92   sample_size = sampler_rt->CalculateNumSamples(num_rows);
93   if (sample_size == -1) {
94     RETURN_IF_NOT_OK(size_getter->DryRun(shared_from_this(), &sample_size));
95   }
96   *dataset_size = sample_size;
97   dataset_size_ = *dataset_size;
98   return Status::OK();
99 }
100 
to_json(nlohmann::json * out_json)101 Status SpeechCommandsNode::to_json(nlohmann::json *out_json) {
102   nlohmann::json args, sampler_args;
103   RETURN_IF_NOT_OK(sampler_->to_json(&sampler_args));
104   args["sampler"] = sampler_args;
105   args["usage"] = usage_;
106   args["num_parallel_workers"] = num_workers_;
107   args["connector_queue_size"] = connector_que_size_;
108   args["dataset_dir"] = dataset_dir_;
109   if (cache_ != nullptr) {
110     nlohmann::json cache_args;
111     RETURN_IF_NOT_OK(cache_->to_json(&cache_args));
112     args["cache"] = cache_args;
113   }
114   *out_json = args;
115   return Status::OK();
116 }
117 }  // namespace dataset
118 }  // namespace mindspore
119