1 /**
2 * Copyright 2019-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <iostream>
17 #include <memory>
18 #include <string>
19 #include "common/common.h"
20 #include "utils/ms_utils.h"
21 #include "minddata/dataset/core/client.h"
22 #include "minddata/dataset/core/global_context.h"
23 #include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
24 #include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
25 #include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
26 #include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
27 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
28 #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
29 #include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
30 #include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
31 #include "minddata/dataset/util/status.h"
32 #include "gtest/gtest.h"
33 #include "utils/log_adapter.h"
34 #include "securec.h"
35
36 namespace common = mindspore::common;
37
38 using namespace mindspore::dataset;
39 using mindspore::LogStream;
40 using mindspore::ExceptionType::NoExceptionType;
41 using mindspore::MsLogLevel::ERROR;
42
43 // std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);
44
45 // std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
46
47 // std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
48
ImageFolder(int64_t num_works,int64_t rows,int64_t conns,std::string path,bool shuf=false,std::shared_ptr<SamplerRT> sampler=nullptr,std::map<std::string,int32_t> map={},bool decode=false)49 std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path,
50 bool shuf = false, std::shared_ptr<SamplerRT> sampler = nullptr,
51 std::map<std::string, int32_t> map = {}, bool decode = false) {
52 std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
53 TensorShape scalar = TensorShape::CreateScalar();
54 (void)schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1));
55 (void)schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar));
56 std::set<std::string> ext = {".jpg", ".JPEG"};
57 if (sampler == nullptr) {
58 int64_t num_samples = 0; // default num samples of 0 means to sample entire set of data
59 int64_t start_index = 0;
60 sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
61 }
62 std::shared_ptr<ImageFolderOp> so =
63 std::make_shared<ImageFolderOp>(num_works, path, conns, false, decode, ext, map, std::move(schema), sampler);
64 return so;
65 }
66
Create1DTensor(std::shared_ptr<Tensor> * sample_ids,int64_t num_elements,unsigned char * data=nullptr,DataType::Type data_type=DataType::DE_UINT32)67 Status Create1DTensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements, unsigned char *data = nullptr,
68 DataType::Type data_type = DataType::DE_UINT32) {
69 TensorShape shape(std::vector<int64_t>(1, num_elements));
70 RETURN_IF_NOT_OK(Tensor::CreateFromMemory(shape, DataType(data_type), data, sample_ids));
71
72 return Status::OK();
73 }
74
75 class MindDataTestImageFolderSampler : public UT::DatasetOpTesting {
76 protected:
77 };
78
TEST_F(MindDataTestImageFolderSampler,TestSequentialImageFolderWithRepeat)79 TEST_F(MindDataTestImageFolderSampler, TestSequentialImageFolderWithRepeat) {
80 std::string folder_path = datasets_root_path_ + "/testPK/data";
81 auto op1 = ImageFolder(16, 2, 32, folder_path, false);
82 auto op2 = Repeat(2);
83 op1->SetTotalRepeats(2);
84 op1->SetNumRepeatsPerEpoch(2);
85 auto tree = Build({op1, op2});
86 tree->Prepare();
87 int32_t res[] = {0, 1, 2, 3};
88 Status rc = tree->Launch();
89 if (rc.IsError()) {
90 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
91 EXPECT_TRUE(false);
92 } else {
93 DatasetIterator di(tree);
94 TensorMap tensor_map;
95 ASSERT_OK(di.GetNextAsMap(&tensor_map));
96 EXPECT_TRUE(rc.IsOk());
97 uint64_t i = 0;
98 int32_t label = 0;
99 while (tensor_map.size() != 0) {
100 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
101 EXPECT_TRUE(res[(i % 44) / 11] == label);
102 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
103 i++;
104 ASSERT_OK(di.GetNextAsMap(&tensor_map));
105 }
106 EXPECT_TRUE(i == 88);
107 }
108 }
109
TEST_F(MindDataTestImageFolderSampler,TestRandomImageFolder)110 TEST_F(MindDataTestImageFolderSampler, TestRandomImageFolder) {
111 std::string folder_path = datasets_root_path_ + "/testPK/data";
112 auto tree = Build({ImageFolder(16, 2, 32, folder_path, true, nullptr)});
113 tree->Prepare();
114 Status rc = tree->Launch();
115 if (rc.IsError()) {
116 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
117 EXPECT_TRUE(false);
118 } else {
119 DatasetIterator di(tree);
120 TensorMap tensor_map;
121 ASSERT_OK(di.GetNextAsMap(&tensor_map));
122 EXPECT_TRUE(rc.IsOk());
123 uint64_t i = 0;
124 int32_t label = 0;
125 while (tensor_map.size() != 0) {
126 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
127 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
128 i++;
129 ASSERT_OK(di.GetNextAsMap(&tensor_map));
130 }
131 EXPECT_TRUE(i == 44);
132 }
133 }
134
TEST_F(MindDataTestImageFolderSampler,TestRandomSamplerImageFolder)135 TEST_F(MindDataTestImageFolderSampler, TestRandomSamplerImageFolder) {
136 int32_t original_seed = GlobalContext::config_manager()->seed();
137 GlobalContext::config_manager()->set_seed(0);
138 int64_t num_samples = 12;
139 std::shared_ptr<SamplerRT> sampler = std::make_unique<RandomSamplerRT>(true, num_samples, true);
140 int32_t res[] = {2, 2, 2, 3, 2, 3, 2, 3, 1, 2, 2, 1}; // ground truth label
141 std::string folder_path = datasets_root_path_ + "/testPK/data";
142 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
143 tree->Prepare();
144 Status rc = tree->Launch();
145 if (rc.IsError()) {
146 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
147 EXPECT_TRUE(false);
148 } else {
149 DatasetIterator di(tree);
150 TensorMap tensor_map;
151 ASSERT_OK(di.GetNextAsMap(&tensor_map));
152 EXPECT_TRUE(rc.IsOk());
153 uint64_t i = 0;
154 int32_t label = 0;
155 while (tensor_map.size() != 0) {
156 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
157 EXPECT_TRUE(res[i] == label);
158 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
159 i++;
160 ASSERT_OK(di.GetNextAsMap(&tensor_map));
161 }
162 EXPECT_TRUE(i == 12);
163 }
164 GlobalContext::config_manager()->set_seed(original_seed);
165 }
166
TEST_F(MindDataTestImageFolderSampler,TestSequentialImageFolderWithRepeatBatch)167 TEST_F(MindDataTestImageFolderSampler, TestSequentialImageFolderWithRepeatBatch) {
168 std::string folder_path = datasets_root_path_ + "/testPK/data";
169 auto op1 = ImageFolder(16, 2, 32, folder_path, false);
170 auto op2 = Repeat(2);
171 auto op3 = Batch(11);
172 op1->SetTotalRepeats(2);
173 op1->SetNumRepeatsPerEpoch(2);
174 auto tree = Build({op1, op2, op3});
175 tree->Prepare();
176 int32_t res[4][11] = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
177 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
178 {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
179 {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}};
180 Status rc = tree->Launch();
181 if (rc.IsError()) {
182 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
183 EXPECT_TRUE(false);
184 } else {
185 DatasetIterator di(tree);
186 TensorMap tensor_map;
187 ASSERT_OK(di.GetNextAsMap(&tensor_map));
188 EXPECT_TRUE(rc.IsOk());
189 uint64_t i = 0;
190 while (tensor_map.size() != 0) {
191 std::shared_ptr<Tensor> label;
192 Create1DTensor(&label, 11, reinterpret_cast<unsigned char *>(res[i % 4]), DataType::DE_INT32);
193 EXPECT_TRUE((*label) == (*tensor_map["label"]));
194 MS_LOG(DEBUG) << "row: " << i << " " << tensor_map["image"]->shape() << " (*label):" << (*label)
195 << " *tensor_map[label]: " << *tensor_map["label"] << std::endl;
196 i++;
197 ASSERT_OK(di.GetNextAsMap(&tensor_map));
198 }
199 EXPECT_TRUE(i == 8);
200 }
201 }
202
TEST_F(MindDataTestImageFolderSampler,TestSubsetRandomSamplerImageFolder)203 TEST_F(MindDataTestImageFolderSampler, TestSubsetRandomSamplerImageFolder) {
204 // id range 0 - 10 is label 0, and id range 11 - 21 is label 1
205 std::vector<int64_t> indices({0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11});
206 int64_t num_samples = 0;
207 std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(indices, num_samples);
208 std::string folder_path = datasets_root_path_ + "/testPK/data";
209 // Expect 6 samples for label 0 and 1
210 int res[2] = {6, 6};
211 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
212 tree->Prepare();
213 Status rc = tree->Launch();
214 if (rc.IsError()) {
215 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
216 EXPECT_TRUE(false);
217 } else {
218 DatasetIterator di(tree);
219 TensorMap tensor_map;
220 rc = di.GetNextAsMap(&tensor_map);
221 EXPECT_TRUE(rc.IsOk());
222 uint64_t i = 0;
223 int32_t label = 0;
224 while (tensor_map.size() != 0) {
225 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
226 res[label]--;
227 i++;
228 ASSERT_OK(di.GetNextAsMap(&tensor_map));
229 }
230 EXPECT_EQ(res[0], 0);
231 EXPECT_EQ(res[1], 0);
232 EXPECT_TRUE(i == 12);
233 }
234 }
235
TEST_F(MindDataTestImageFolderSampler,TestWeightedRandomSamplerImageFolder)236 TEST_F(MindDataTestImageFolderSampler, TestWeightedRandomSamplerImageFolder) {
237 // num samples to draw.
238 int64_t num_samples = 12;
239 int64_t total_samples = 44;
240 int64_t samples_per_tensor = 10;
241 std::vector<double> weights(total_samples, std::rand() % 100);
242
243 // create sampler with replacement = replacement
244 std::shared_ptr<SamplerRT> sampler =
245 std::make_shared<WeightedRandomSamplerRT>(weights, num_samples, true, samples_per_tensor);
246
247 std::string folder_path = datasets_root_path_ + "/testPK/data";
248 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
249 tree->Prepare();
250 Status rc = tree->Launch();
251 if (rc.IsError()) {
252 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
253 EXPECT_TRUE(false);
254 } else {
255 DatasetIterator di(tree);
256 TensorMap tensor_map;
257 rc = di.GetNextAsMap(&tensor_map);
258 EXPECT_TRUE(rc.IsOk());
259 uint64_t i = 0;
260 int32_t label = 0;
261 while (tensor_map.size() != 0) {
262 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
263 i++;
264 ASSERT_OK(di.GetNextAsMap(&tensor_map));
265 }
266 EXPECT_TRUE(i == 12);
267 }
268 }
269
TEST_F(MindDataTestImageFolderSampler,TestImageFolderClassIndex)270 TEST_F(MindDataTestImageFolderSampler, TestImageFolderClassIndex) {
271 std::string folder_path = datasets_root_path_ + "/testPK/data";
272 std::map<std::string, int32_t> map;
273 map["class3"] = 333;
274 map["class1"] = 111;
275 map["wrong folder name"] = 1234; // this is skipped
276 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, nullptr, map)});
277 int64_t res[2] = {111, 333};
278 tree->Prepare();
279 Status rc = tree->Launch();
280 if (rc.IsError()) {
281 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
282 EXPECT_TRUE(false);
283 } else {
284 DatasetIterator di(tree);
285 TensorMap tensor_map;
286 ASSERT_OK(di.GetNextAsMap(&tensor_map));
287 EXPECT_TRUE(rc.IsOk());
288 uint64_t i = 0;
289 int32_t label = 0;
290 while (tensor_map.size() != 0) {
291 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
292 EXPECT_TRUE(label == res[i / 11]);
293 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
294 i++;
295 ASSERT_OK(di.GetNextAsMap(&tensor_map));
296 }
297 EXPECT_TRUE(i == 22);
298 }
299 }
300
TEST_F(MindDataTestImageFolderSampler,TestDistributedSampler)301 TEST_F(MindDataTestImageFolderSampler, TestDistributedSampler) {
302 int64_t num_samples = 0;
303 std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(11, 10, false, num_samples);
304 std::string folder_path = datasets_root_path_ + "/testPK/data";
305 auto op1 = ImageFolder(16, 2, 32, folder_path, false, std::move(sampler));
306 auto op2 = Repeat(4);
307 op1->SetTotalRepeats(4);
308 op1->SetNumRepeatsPerEpoch(4);
309 auto tree = Build({op1, op2});
310 tree->Prepare();
311 Status rc = tree->Launch();
312 if (rc.IsError()) {
313 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
314 EXPECT_TRUE(false);
315 } else {
316 DatasetIterator di(tree);
317 TensorMap tensor_map;
318 rc = di.GetNextAsMap(&tensor_map);
319 EXPECT_TRUE(rc.IsOk());
320 uint64_t i = 0;
321 int32_t label = 0;
322 while (tensor_map.size() != 0) {
323 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
324 EXPECT_EQ(i % 4, label);
325 MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n";
326 i++;
327 ASSERT_OK(di.GetNextAsMap(&tensor_map));
328 }
329 EXPECT_TRUE(i == 16);
330 }
331 }
332
TEST_F(MindDataTestImageFolderSampler,TestPKSamplerImageFolder)333 TEST_F(MindDataTestImageFolderSampler, TestPKSamplerImageFolder) {
334 int64_t num_samples = 0;
335 std::shared_ptr<SamplerRT> sampler = std::make_shared<PKSamplerRT>(3, false, num_samples, 4);
336 int32_t res[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; // ground truth label
337 std::string folder_path = datasets_root_path_ + "/testPK/data";
338 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
339 tree->Prepare();
340 Status rc = tree->Launch();
341 if (rc.IsError()) {
342 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
343 EXPECT_TRUE(false);
344 } else {
345 DatasetIterator di(tree);
346 TensorMap tensor_map;
347 ASSERT_OK(di.GetNextAsMap(&tensor_map));
348 EXPECT_TRUE(rc.IsOk());
349 uint64_t i = 0;
350 int32_t label = 0;
351 while (tensor_map.size() != 0) {
352 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
353 EXPECT_TRUE(res[i] == label);
354 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
355 i++;
356 ASSERT_OK(di.GetNextAsMap(&tensor_map));
357 }
358 EXPECT_TRUE(i == 12);
359 }
360 }
361
TEST_F(MindDataTestImageFolderSampler,TestImageFolderDecode)362 TEST_F(MindDataTestImageFolderSampler, TestImageFolderDecode) {
363 std::string folder_path = datasets_root_path_ + "/testPK/data";
364 std::map<std::string, int32_t> map;
365 map["class3"] = 333;
366 map["class1"] = 111;
367 map["wrong folder name"] = 1234; // this is skipped
368 int64_t num_samples = 20;
369 int64_t start_index = 0;
370 auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
371 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(seq_sampler), map, true)});
372 int64_t res[2] = {111, 333};
373 tree->Prepare();
374 Status rc = tree->Launch();
375 if (rc.IsError()) {
376 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
377 EXPECT_TRUE(false);
378 } else {
379 DatasetIterator di(tree);
380 TensorMap tensor_map;
381 ASSERT_OK(di.GetNextAsMap(&tensor_map));
382 EXPECT_TRUE(rc.IsOk());
383 uint64_t i = 0;
384 int32_t label = 0;
385 while (tensor_map.size() != 0) {
386 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
387 EXPECT_TRUE(label == res[i / 11]);
388 EXPECT_TRUE(tensor_map["image"]->shape() ==
389 TensorShape({2268, 4032, 3})); // verify shapes are correct after decode
390 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
391 i++;
392 ASSERT_OK(di.GetNextAsMap(&tensor_map));
393 }
394 EXPECT_TRUE(i == 20);
395 }
396 }
397
TEST_F(MindDataTestImageFolderSampler,TestImageFolderSharding1)398 TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding1) {
399 int64_t num_samples = 5;
400 std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(4, 0, false, num_samples);
401 std::string folder_path = datasets_root_path_ + "/testPK/data";
402 // numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode
403 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler), {})});
404 tree->Prepare();
405 Status rc = tree->Launch();
406 int32_t labels[5] = {0, 0, 0, 1, 1};
407 if (rc.IsError()) {
408 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
409 EXPECT_TRUE(false);
410 } else {
411 DatasetIterator di(tree);
412 TensorMap tensor_map;
413 rc = di.GetNextAsMap(&tensor_map);
414 EXPECT_TRUE(rc.IsOk());
415 uint64_t i = 0;
416 int32_t label = 0;
417 while (tensor_map.size() != 0) {
418 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
419 EXPECT_EQ(labels[i], label);
420 MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n";
421 i++;
422 ASSERT_OK(di.GetNextAsMap(&tensor_map));
423 }
424 EXPECT_TRUE(i == 5);
425 }
426 }
427
TEST_F(MindDataTestImageFolderSampler,TestImageFolderSharding2)428 TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding2) {
429 int64_t num_samples = 12;
430 std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(4, 3, false, num_samples);
431 std::string folder_path = datasets_root_path_ + "/testPK/data";
432 // numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode
433 auto tree = Build({ImageFolder(16, 16, 32, folder_path, false, std::move(sampler), {})});
434 tree->Prepare();
435 Status rc = tree->Launch();
436 uint32_t labels[11] = {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3};
437 if (rc.IsError()) {
438 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << ".";
439 EXPECT_TRUE(false);
440 } else {
441 DatasetIterator di(tree);
442 TensorMap tensor_map;
443 rc = di.GetNextAsMap(&tensor_map);
444 EXPECT_TRUE(rc.IsOk());
445 uint64_t i = 0;
446 int32_t label = 0;
447 while (tensor_map.size() != 0) {
448 tensor_map["label"]->GetItemAt<int32_t>(&label, {});
449 EXPECT_EQ(labels[i], label);
450 MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n";
451 i++;
452 ASSERT_OK(di.GetNextAsMap(&tensor_map));
453 }
454 EXPECT_TRUE(i == 11);
455 }
456 }
457