• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "minddata/dataset/engine/ir/datasetops/source/stl10_node.h"
18 
19 #include "minddata/dataset/engine/datasetops/source/stl10_op.h"
20 #include "minddata/dataset/util/status.h"
21 
22 namespace mindspore {
23 namespace dataset {
STL10Node(const std::string & dataset_dir,const std::string & usage,std::shared_ptr<SamplerObj> sampler,std::shared_ptr<DatasetCache> cache)24 STL10Node::STL10Node(const std::string &dataset_dir, const std::string &usage, std::shared_ptr<SamplerObj> sampler,
25                      std::shared_ptr<DatasetCache> cache)
26     : MappableSourceNode(std::move(cache)), dataset_dir_(dataset_dir), usage_(usage), sampler_(sampler) {}
27 
Copy()28 std::shared_ptr<DatasetNode> STL10Node::Copy() {
29   std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
30   auto node = std::make_shared<STL10Node>(dataset_dir_, usage_, sampler, cache_);
31   (void)node->SetNumWorkers(num_workers_);
32   (void)node->SetConnectorQueueSize(connector_que_size_);
33   return node;
34 }
35 
Print(std::ostream & out) const36 void STL10Node::Print(std::ostream &out) const {
37   out << (Name() + "(cache:" + ((cache_ != nullptr) ? "true" : "false") + ")");
38 }
39 
ValidateParams()40 Status STL10Node::ValidateParams() {
41   RETURN_IF_NOT_OK(DatasetNode::ValidateParams());
42   RETURN_IF_NOT_OK(ValidateDatasetDirParam("STL10Node", dataset_dir_));
43 
44   RETURN_IF_NOT_OK(ValidateDatasetSampler("STL10Node", sampler_));
45 
46   RETURN_IF_NOT_OK(ValidateStringValue("STL10Node", usage_, {"train", "test", "unlabeled", "train+unlabeled", "all"}));
47 
48   return Status::OK();
49 }
50 
51 // Function to build STL10Op for STL10
Build(std::vector<std::shared_ptr<DatasetOp>> * const node_ops)52 Status STL10Node::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
53   // Do internal Schema generation.
54   auto schema = std::make_unique<DataSchema>();
55   RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kCv, 1)));
56   TensorShape scalar = TensorShape::CreateScalar();
57   RETURN_IF_NOT_OK(
58     schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar)));
59   std::shared_ptr<SamplerRT> sampler_rt = nullptr;
60   RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
61 
62   auto stl10_op = std::make_shared<STL10Op>(usage_, num_workers_, dataset_dir_, connector_que_size_, std::move(schema),
63                                             std::move(sampler_rt));
64   stl10_op->SetTotalRepeats(GetTotalRepeats());
65   stl10_op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
66   node_ops->push_back(stl10_op);
67 
68   return Status::OK();
69 }
70 
71 // Get the shard id of node
GetShardId(int32_t * shard_id)72 Status STL10Node::GetShardId(int32_t *shard_id) {
73   *shard_id = sampler_->ShardId();
74 
75   return Status::OK();
76 }
77 
78 // Get Dataset size
GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> & size_getter,bool estimate,int64_t * dataset_size)79 Status STL10Node::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
80                                  int64_t *dataset_size) {
81   if (dataset_size_ > 0) {
82     *dataset_size = dataset_size_;
83     return Status::OK();
84   }
85 
86   int64_t num_rows, sample_size;
87   RETURN_IF_NOT_OK(STL10Op::CountTotalRows(dataset_dir_, usage_, &num_rows));
88   std::shared_ptr<SamplerRT> sampler_rt = nullptr;
89   RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
90 
91   sample_size = sampler_rt->CalculateNumSamples(num_rows);
92   if (sample_size == -1) {
93     RETURN_IF_NOT_OK(size_getter->DryRun(shared_from_this(), &sample_size));
94   }
95 
96   *dataset_size = sample_size;
97   dataset_size_ = *dataset_size;
98 
99   return Status::OK();
100 }
101 
to_json(nlohmann::json * out_json)102 Status STL10Node::to_json(nlohmann::json *out_json) {
103   nlohmann::json args, sampler_args;
104   RETURN_IF_NOT_OK(sampler_->to_json(&sampler_args));
105   args["sampler"] = sampler_args;
106   args["num_parallel_workers"] = num_workers_;
107   args["connector_queue_size"] = connector_que_size_;
108   args["dataset_dir"] = dataset_dir_;
109   args["usage"] = usage_;
110   if (cache_ != nullptr) {
111     nlohmann::json cache_args;
112     RETURN_IF_NOT_OK(cache_->to_json(&cache_args));
113     args["cache"] = cache_args;
114   }
115   *out_json = args;
116   return Status::OK();
117 }
118 }  // namespace dataset
119 }  // namespace mindspore
120