1 /**
2 * Copyright 2019-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <fstream>
17 #include <iostream>
18 #include <memory>
19 #include <string>
20
21 #include "common/common.h"
22 #include "utils/ms_utils.h"
23 #include "minddata/dataset/core/client.h"
24 #include "minddata/dataset/core/global_context.h"
25 #include "minddata/dataset/engine/datasetops/source/manifest_op.h"
26 #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
27 #include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
28 #include "minddata/dataset/util/status.h"
29 #include "gtest/gtest.h"
30 #include "utils/log_adapter.h"
31 #include "securec.h"
32
33 namespace common = mindspore::common;
34
35 using namespace mindspore::dataset;
36 using mindspore::LogStream;
37 using mindspore::ExceptionType::NoExceptionType;
38 using mindspore::MsLogLevel::ERROR;
39
Manifest(int32_t num_works,int32_t rows,int32_t conns,const std::string & file,std::string usage="train",std::shared_ptr<SamplerRT> sampler=nullptr,std::map<std::string,int32_t> map={},bool decode=false)40 std::shared_ptr<ManifestOp> Manifest(int32_t num_works, int32_t rows, int32_t conns, const std::string &file,
41 std::string usage = "train", std::shared_ptr<SamplerRT> sampler = nullptr,
42 std::map<std::string, int32_t> map = {}, bool decode = false) {
43 if (sampler == nullptr) {
44 const int64_t num_samples = 0;
45 const int64_t start_index = 0;
46 sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
47 }
48 auto schema = std::make_unique<DataSchema>();
49 schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1));
50 schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1));
51 return std::make_shared<ManifestOp>(num_works, file, conns, decode, map, std::move(schema), std::move(sampler),
52 usage);
53 }
54
55 class MindDataTestManifest : public UT::DatasetOpTesting {
56 protected:
57 };
58
TEST_F(MindDataTestManifest,TestSequentialManifestWithRepeat)59 TEST_F(MindDataTestManifest, TestSequentialManifestWithRepeat) {
60 std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
61 auto op1 = Manifest(16, 2, 32, file);
62 auto op2 = Repeat(2);
63 op1->SetTotalRepeats(2);
64 op1->SetNumRepeatsPerEpoch(2);
65 auto tree = Build({op1, op2});
66 tree->Prepare();
67 uint32_t res[] = {0, 1, 0, 1};
68 Status rc = tree->Launch();
69 if (rc.IsError()) {
70 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
71 EXPECT_TRUE(false);
72 } else {
73 DatasetIterator di(tree);
74 TensorMap tensor_map;
75 ASSERT_OK(di.GetNextAsMap(&tensor_map));
76 EXPECT_TRUE(rc.IsOk());
77 uint64_t i = 0;
78 int32_t label = 0;
79 while (tensor_map.size() != 0) {
80 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
81 EXPECT_TRUE(res[i] == label);
82 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
83 i++;
84 ASSERT_OK(di.GetNextAsMap(&tensor_map));
85 }
86 EXPECT_TRUE(i == 4);
87 }
88 }
89
TEST_F(MindDataTestManifest,TestSubsetRandomSamplerManifest)90 TEST_F(MindDataTestManifest, TestSubsetRandomSamplerManifest) {
91 std::vector<int64_t> indices({1});
92 int64_t num_samples = 0;
93 std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(indices, num_samples);
94 std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
95 // Expect 6 samples for label 0 and 1
96 auto tree = Build({Manifest(16, 2, 32, file, "train", std::move(sampler))});
97 tree->Prepare();
98 Status rc = tree->Launch();
99 if (rc.IsError()) {
100 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
101 EXPECT_TRUE(false);
102 } else {
103 DatasetIterator di(tree);
104 TensorMap tensor_map;
105 rc = di.GetNextAsMap(&tensor_map);
106 EXPECT_TRUE(rc.IsOk());
107 uint64_t i = 0;
108 int32_t label = 0;
109 while (tensor_map.size() != 0) {
110 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
111 i++;
112 ASSERT_OK(di.GetNextAsMap(&tensor_map));
113 EXPECT_EQ(label, 1);
114 }
115 EXPECT_TRUE(i == 1);
116 }
117 }
118
TEST_F(MindDataTestManifest,MindDataTestManifestClassIndex)119 TEST_F(MindDataTestManifest, MindDataTestManifestClassIndex) {
120 std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
121 std::map<std::string, int32_t> map;
122 map["cat"] = 111; // forward slash is not good, but we need to add this somewhere, also in windows, its a '\'
123 map["dog"] = 222; // forward slash is not good, but we need to add this somewhere, also in windows, its a '\'
124 map["wrong folder name"] = 1234; // this is skipped
125 auto tree = Build({Manifest(16, 2, 32, file, "train", nullptr, map)});
126 uint64_t res[2] = {111, 222};
127 tree->Prepare();
128 Status rc = tree->Launch();
129 if (rc.IsError()) {
130 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
131 EXPECT_TRUE(false);
132 } else {
133 DatasetIterator di(tree);
134 TensorMap tensor_map;
135 ASSERT_OK(di.GetNextAsMap(&tensor_map));
136 EXPECT_TRUE(rc.IsOk());
137 uint64_t i = 0;
138 int32_t label = 0;
139 while (tensor_map.size() != 0) {
140 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
141 EXPECT_TRUE(label == res[i]);
142 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
143 i++;
144 ASSERT_OK(di.GetNextAsMap(&tensor_map));
145 }
146 EXPECT_TRUE(i == 2);
147 }
148 }
149
TEST_F(MindDataTestManifest,MindDataTestManifestNumSamples)150 TEST_F(MindDataTestManifest, MindDataTestManifestNumSamples) {
151 std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
152 int64_t num_samples = 1;
153 int64_t start_index = 0;
154 auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
155 auto op1 = Manifest(16, 2, 32, file, "train", std::move(seq_sampler), {});
156 auto op2 = Repeat(4);
157 op1->SetTotalRepeats(4);
158 op1->SetNumRepeatsPerEpoch(4);
159 auto tree = Build({op1, op2});
160 tree->Prepare();
161 Status rc = tree->Launch();
162 if (rc.IsError()) {
163 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
164 EXPECT_TRUE(false);
165 } else {
166 DatasetIterator di(tree);
167 TensorMap tensor_map;
168 ASSERT_OK(di.GetNextAsMap(&tensor_map));
169 EXPECT_TRUE(rc.IsOk());
170 uint64_t i = 0;
171 int32_t label = 0;
172 while (tensor_map.size() != 0) {
173 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
174 EXPECT_TRUE(0 == label);
175 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
176 i++;
177 ASSERT_OK(di.GetNextAsMap(&tensor_map));
178 }
179 EXPECT_TRUE(i == 4);
180 }
181 }
182
TEST_F(MindDataTestManifest,MindDataTestManifestEval)183 TEST_F(MindDataTestManifest, MindDataTestManifestEval) {
184 std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
185 int64_t num_samples = 1;
186 int64_t start_index = 0;
187 auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
188 auto tree = Build({Manifest(16, 2, 32, file, "eval", std::move(seq_sampler), {})});
189 tree->Prepare();
190 Status rc = tree->Launch();
191 if (rc.IsError()) {
192 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
193 EXPECT_TRUE(false);
194 } else {
195 DatasetIterator di(tree);
196 TensorMap tensor_map;
197 ASSERT_OK(di.GetNextAsMap(&tensor_map));
198 EXPECT_TRUE(rc.IsOk());
199 uint64_t i = 0;
200 int32_t label = 0;
201 while (tensor_map.size() != 0) {
202 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
203 EXPECT_TRUE(0 == label);
204 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
205 i++;
206 ASSERT_OK(di.GetNextAsMap(&tensor_map));
207 }
208 EXPECT_TRUE(i == 1);
209 }
210 }
211