1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "common/common.h"
17 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
18 #include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h"
19 #include "minddata/dataset/include/dataset/datasets.h"
20 #include <functional>
21
22 using namespace mindspore::dataset;
23 using mindspore::dataset::Tensor;
24
25 class MindDataTestPipeline : public UT::DatasetOpTesting {
26 protected:
27 };
28
TEST_F(MindDataTestPipeline,TestImageFolderWithSamplers)29 TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
30 std::shared_ptr<Sampler> sampl = std::make_shared<DistributedSampler>(2, 1);
31 EXPECT_NE(sampl, nullptr);
32
33 sampl = std::make_shared<PKSampler>(3);
34 EXPECT_NE(sampl, nullptr);
35
36 sampl = std::make_shared<RandomSampler>(false, 12);
37 EXPECT_NE(sampl, nullptr);
38
39 sampl = std::make_shared<SequentialSampler>(0, 12);
40 EXPECT_NE(sampl, nullptr);
41
42 std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
43 sampl = std::make_shared<WeightedRandomSampler>(weights, 12);
44 EXPECT_NE(sampl, nullptr);
45
46 std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
47 sampl = std::make_shared<SubsetSampler>(indices);
48 EXPECT_NE(sampl, nullptr);
49
50 sampl = std::make_shared<SubsetRandomSampler>(indices);
51 EXPECT_NE(sampl, nullptr);
52
53 // Create an ImageFolder Dataset
54 std::string folder_path = datasets_root_path_ + "/testPK/data/";
55 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
56 EXPECT_NE(ds, nullptr);
57
58 // Create a Repeat operation on ds
59 int32_t repeat_num = 2;
60 ds = ds->Repeat(repeat_num);
61 EXPECT_NE(ds, nullptr);
62
63 // Create a Batch operation on ds
64 int32_t batch_size = 2;
65 ds = ds->Batch(batch_size);
66 EXPECT_NE(ds, nullptr);
67
68 // Create an iterator over the result of the above dataset
69 // This will trigger the creation of the Execution Tree and launch it.
70 std::shared_ptr<Iterator> iter = ds->CreateIterator();
71 EXPECT_NE(iter, nullptr);
72
73 // Iterate the dataset and get each row
74 std::unordered_map<std::string, mindspore::MSTensor> row;
75 ASSERT_OK(iter->GetNextRow(&row));
76
77 uint64_t i = 0;
78 while (row.size() != 0) {
79 i++;
80 auto image = row["image"];
81 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
82 ASSERT_OK(iter->GetNextRow(&row));
83 }
84
85 EXPECT_EQ(i, 12);
86
87 // Manually terminate the pipeline
88 iter->Stop();
89 }
90
TEST_F(MindDataTestPipeline,TestNoSamplerSuccess1)91 TEST_F(MindDataTestPipeline, TestNoSamplerSuccess1) {
92 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNoSamplerSuccess1.";
93 // Test building a dataset with no sampler provided (defaults to random sampler
94
95 // Create an ImageFolder Dataset
96 std::string folder_path = datasets_root_path_ + "/testPK/data/";
97 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false);
98 EXPECT_NE(ds, nullptr);
99
100 // Iterate the dataset and get each row
101 std::shared_ptr<Iterator> iter = ds->CreateIterator();
102 EXPECT_NE(iter, nullptr);
103 std::unordered_map<std::string, mindspore::MSTensor> row;
104 ASSERT_OK(iter->GetNextRow(&row));
105
106 uint64_t i = 0;
107 while (row.size() != 0) {
108 i++;
109 auto label = row["label"];
110 ASSERT_OK(iter->GetNextRow(&row));
111 }
112
113 EXPECT_EQ(i, ds->GetDatasetSize());
114 iter->Stop();
115 }
116
TEST_F(MindDataTestPipeline,TestDistributedSamplerSuccess1)117 TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess1) {
118 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess1.";
119 // Test basic setting of distributed_sampler
120
121 // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
122 std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, -1, true);
123 EXPECT_NE(sampler, nullptr);
124
125 // Create an ImageFolder Dataset
126 std::string folder_path = datasets_root_path_ + "/testPK/data/";
127 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
128 EXPECT_NE(ds, nullptr);
129
130 // Iterate the dataset and get each row
131 std::shared_ptr<Iterator> iter = ds->CreateIterator();
132 EXPECT_NE(iter, nullptr);
133 std::unordered_map<std::string, mindspore::MSTensor> row;
134 ASSERT_OK(iter->GetNextRow(&row));
135
136 uint64_t i = 0;
137 while (row.size() != 0) {
138 i++;
139 auto label = row["label"];
140 ASSERT_OK(iter->GetNextRow(&row));
141 }
142
143 EXPECT_EQ(i, 11);
144 iter->Stop();
145 }
146
TEST_F(MindDataTestPipeline,TestDistributedSamplerSuccess2)147 TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess2) {
148 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess2.";
149 // Test basic setting of distributed_sampler
150
151 // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
152 auto sampler(new DistributedSampler(4, 0, false, 0, 0, -1, true));
153 // Note that with new, we have to explicitly delete the allocated object as shown below.
154 // Note: No need to check for output after calling API class constructor
155
156 // Create an ImageFolder Dataset
157 std::string folder_path = datasets_root_path_ + "/testPK/data/";
158 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
159 EXPECT_NE(ds, nullptr);
160
161 // Iterate the dataset and get each row
162 std::shared_ptr<Iterator> iter = ds->CreateIterator();
163 EXPECT_NE(iter, nullptr);
164 std::unordered_map<std::string, mindspore::MSTensor> row;
165 ASSERT_OK(iter->GetNextRow(&row));
166
167 uint64_t i = 0;
168 while (row.size() != 0) {
169 i++;
170 auto label = row["label"];
171 ASSERT_OK(iter->GetNextRow(&row));
172 }
173
174 EXPECT_EQ(i, 11);
175 iter->Stop();
176
177 // Delete allocated objects with raw pointers
178 delete sampler;
179 }
180
TEST_F(MindDataTestPipeline,TestDistributedSamplerSuccess3)181 TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess3) {
182 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess3.";
183 // Test basic setting of distributed_sampler
184
185 // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
186 DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, -1, true);
187
188 // Create an ImageFolder Dataset
189 std::string folder_path = datasets_root_path_ + "/testPK/data/";
190 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
191 EXPECT_NE(ds, nullptr);
192
193 // Iterate the dataset and get each row
194 std::shared_ptr<Iterator> iter = ds->CreateIterator();
195 EXPECT_NE(iter, nullptr);
196 std::unordered_map<std::string, mindspore::MSTensor> row;
197 ASSERT_OK(iter->GetNextRow(&row));
198
199 uint64_t i = 0;
200 while (row.size() != 0) {
201 i++;
202 auto label = row["label"];
203 ASSERT_OK(iter->GetNextRow(&row));
204 }
205
206 EXPECT_EQ(i, 11);
207 iter->Stop();
208 }
209
TEST_F(MindDataTestPipeline,TestDistributedSamplerSuccess4)210 TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess4) {
211 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess4.";
212 // Test pointer of distributed_sampler
213 SequentialSampler sampler = SequentialSampler(0, 4);
214
215 // Create an ImageFolder Dataset
216 std::string folder_path = datasets_root_path_ + "/testVOC2012_2";
217 std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, &sampler);
218 EXPECT_NE(ds, nullptr);
219
220 // Iterate the dataset and get each row
221 std::shared_ptr<Iterator> iter = ds->CreateIterator();
222 EXPECT_NE(iter, nullptr);
223 std::unordered_map<std::string, mindspore::MSTensor> row;
224 ASSERT_OK(iter->GetNextRow(&row));
225
226 uint64_t i = 0;
227 while (row.size() != 0) {
228 i++;
229 auto label = row["label"];
230 ASSERT_OK(iter->GetNextRow(&row));
231 }
232
233 EXPECT_EQ(i, 4);
234 iter->Stop();
235 }
236
TEST_F(MindDataTestPipeline,TestDistributedSamplerFail1)237 TEST_F(MindDataTestPipeline, TestDistributedSamplerFail1) {
238 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail1.";
239 // Test basic setting of distributed_sampler
240
241 // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
242 // offset=5 which is greater than num_shards=4 --> will fail later
243 std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, 5, false);
244 EXPECT_NE(sampler, nullptr);
245
246 // Create an ImageFolder Dataset
247 std::string folder_path = datasets_root_path_ + "/testPK/data/";
248 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
249 EXPECT_NE(ds, nullptr);
250
251 // Iterate will fail because sampler is not initiated successfully.
252 std::shared_ptr<Iterator> iter = ds->CreateIterator();
253 EXPECT_EQ(iter, nullptr);
254 }
255
TEST_F(MindDataTestPipeline,TestDistributedSamplerFail2)256 TEST_F(MindDataTestPipeline, TestDistributedSamplerFail2) {
257 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail2.";
258 // Test basic setting of distributed_sampler
259
260 // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
261 // offset=5 which is greater than num_shards=4 --> will fail later
262 auto sampler(new DistributedSampler(4, 0, false, 0, 0, 5, false));
263 // Note that with new, we have to explicitly delete the allocated object as shown below.
264 // Note: No need to check for output after calling API class constructor
265
266 // Create an ImageFolder Dataset
267 std::string folder_path = datasets_root_path_ + "/testPK/data/";
268 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
269 EXPECT_NE(ds, nullptr);
270
271 // Iterate will fail because sampler is not initiated successfully.
272 std::shared_ptr<Iterator> iter = ds->CreateIterator();
273 EXPECT_EQ(iter, nullptr);
274
275 // Delete allocated objects with raw pointers
276 delete sampler;
277 }
278
TEST_F(MindDataTestPipeline,TestDistributedSamplerFail3)279 TEST_F(MindDataTestPipeline, TestDistributedSamplerFail3) {
280 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail3.";
281 // Test basic setting of distributed_sampler
282
283 // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
284 // offset=5 which is greater than num_shards=4 --> will fail later
285 DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, 5, false);
286
287 // Create an ImageFolder Dataset
288 std::string folder_path = datasets_root_path_ + "/testPK/data/";
289 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
290 EXPECT_NE(ds, nullptr);
291
292 // Iterate will fail because sampler is not initiated successfully.
293 std::shared_ptr<Iterator> iter = ds->CreateIterator();
294 EXPECT_EQ(iter, nullptr);
295 }
296
TEST_F(MindDataTestPipeline,TestSamplerAddChild)297 TEST_F(MindDataTestPipeline, TestSamplerAddChild) {
298 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSamplerAddChild.";
299
300 auto sampler = std::make_shared<DistributedSampler>(1, 0, false, 5, 0, -1, true);
301 EXPECT_NE(sampler, nullptr);
302
303 auto child_sampler = std::make_shared<SequentialSampler>();
304 EXPECT_NE(child_sampler, nullptr);
305
306 sampler->AddChild(child_sampler);
307
308 // Create an ImageFolder Dataset
309 std::string folder_path = datasets_root_path_ + "/testPK/data/";
310 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
311 EXPECT_NE(ds, nullptr);
312
313 // Iterate the dataset and get each row
314 std::shared_ptr<Iterator> iter = ds->CreateIterator();
315 EXPECT_NE(iter, nullptr);
316 std::unordered_map<std::string, mindspore::MSTensor> row;
317 ASSERT_OK(iter->GetNextRow(&row));
318
319 uint64_t i = 0;
320 while (row.size() != 0) {
321 i++;
322 ASSERT_OK(iter->GetNextRow(&row));
323 }
324
325 EXPECT_EQ(ds->GetDatasetSize(), 5);
326 iter->Stop();
327 }
328
TEST_F(MindDataTestPipeline,TestSubsetSamplerSuccess1)329 TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess1) {
330 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess1.";
331 // Test basic setting of subset_sampler with default num_samples
332
333 std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
334 std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices);
335 EXPECT_NE(sampl, nullptr);
336
337 // Create an ImageFolder Dataset
338 std::string folder_path = datasets_root_path_ + "/testPK/data/";
339 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
340 EXPECT_NE(ds, nullptr);
341
342 // Iterate the dataset and get each row
343 std::shared_ptr<Iterator> iter = ds->CreateIterator();
344 EXPECT_NE(iter, nullptr);
345 std::unordered_map<std::string, mindspore::MSTensor> row;
346 ASSERT_OK(iter->GetNextRow(&row));
347
348 uint64_t i = 0;
349 while (row.size() != 0) {
350 i++;
351 auto image = row["image"];
352 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
353 ASSERT_OK(iter->GetNextRow(&row));
354 }
355
356 EXPECT_EQ(i, 6);
357 iter->Stop();
358 }
359
TEST_F(MindDataTestPipeline,TestSubsetSamplerSuccess2)360 TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess2) {
361 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess2.";
362 // Test subset_sampler with num_samples
363
364 std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
365 std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices, 3);
366 EXPECT_NE(sampl, nullptr);
367
368 // Create an ImageFolder Dataset
369 std::string folder_path = datasets_root_path_ + "/testPK/data/";
370 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
371 EXPECT_NE(ds, nullptr);
372
373 // Iterate the dataset and get each row
374 std::shared_ptr<Iterator> iter = ds->CreateIterator();
375 EXPECT_NE(iter, nullptr);
376 std::unordered_map<std::string, mindspore::MSTensor> row;
377 ASSERT_OK(iter->GetNextRow(&row));
378
379 uint64_t i = 0;
380 while (row.size() != 0) {
381 i++;
382 auto image = row["image"];
383 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
384 ASSERT_OK(iter->GetNextRow(&row));
385 }
386
387 EXPECT_EQ(i, 3);
388 iter->Stop();
389 }
390
TEST_F(MindDataTestPipeline,TestSubsetSamplerSuccess3)391 TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess3) {
392 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess3.";
393 // Test subset_sampler with num_samples larger than the indices size.
394
395 std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
396 std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices, 8);
397 EXPECT_NE(sampl, nullptr);
398
399 // Create an ImageFolder Dataset
400 std::string folder_path = datasets_root_path_ + "/testPK/data/";
401 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
402 EXPECT_NE(ds, nullptr);
403
404 // Iterate the dataset and get each row
405 std::shared_ptr<Iterator> iter = ds->CreateIterator();
406 EXPECT_NE(iter, nullptr);
407 std::unordered_map<std::string, mindspore::MSTensor> row;
408 ASSERT_OK(iter->GetNextRow(&row));
409
410 uint64_t i = 0;
411 while (row.size() != 0) {
412 i++;
413 auto image = row["image"];
414 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
415 ASSERT_OK(iter->GetNextRow(&row));
416 }
417
418 EXPECT_EQ(i, 6);
419 iter->Stop();
420 }
421
TEST_F(MindDataTestPipeline,TestSubsetSamplerFail)422 TEST_F(MindDataTestPipeline, TestSubsetSamplerFail) {
423 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerFail.";
424 // Test subset_sampler with index out of bounds.
425
426 std::vector<int64_t> indices = {2, 4, 6, 8, 10, 100}; // Sample ID (100) is out of bound
427 std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices);
428 EXPECT_NE(sampl, nullptr);
429
430 // Create an ImageFolder Dataset
431 std::string folder_path = datasets_root_path_ + "/testPK/data/";
432 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
433 EXPECT_NE(ds, nullptr);
434
435 // Iterate the dataset and get each row
436 std::shared_ptr<Iterator> iter = ds->CreateIterator();
437 EXPECT_NE(iter, nullptr);
438 std::unordered_map<std::string, mindspore::MSTensor> row;
439 // Expect failure: index 100 is out of dataset bounds
440 EXPECT_ERROR(iter->GetNextRow(&row));
441
442 iter->Stop();
443 }
444