• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "common/common.h"
17 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
18 #include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h"
19 #include "minddata/dataset/include/dataset/datasets.h"
20 #include <functional>
21 
22 using namespace mindspore::dataset;
23 using mindspore::dataset::Tensor;
24 
25 class MindDataTestPipeline : public UT::DatasetOpTesting {
26  protected:
27 };
28 
TEST_F(MindDataTestPipeline,TestImageFolderWithSamplers)29 TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) {
30   std::shared_ptr<Sampler> sampl = std::make_shared<DistributedSampler>(2, 1);
31   EXPECT_NE(sampl, nullptr);
32 
33   sampl = std::make_shared<PKSampler>(3);
34   EXPECT_NE(sampl, nullptr);
35 
36   sampl = std::make_shared<RandomSampler>(false, 12);
37   EXPECT_NE(sampl, nullptr);
38 
39   sampl = std::make_shared<SequentialSampler>(0, 12);
40   EXPECT_NE(sampl, nullptr);
41 
42   std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
43   sampl = std::make_shared<WeightedRandomSampler>(weights, 12);
44   EXPECT_NE(sampl, nullptr);
45 
46   std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
47   sampl = std::make_shared<SubsetSampler>(indices);
48   EXPECT_NE(sampl, nullptr);
49 
50   sampl = std::make_shared<SubsetRandomSampler>(indices);
51   EXPECT_NE(sampl, nullptr);
52 
53   // Create an ImageFolder Dataset
54   std::string folder_path = datasets_root_path_ + "/testPK/data/";
55   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
56   EXPECT_NE(ds, nullptr);
57 
58   // Create a Repeat operation on ds
59   int32_t repeat_num = 2;
60   ds = ds->Repeat(repeat_num);
61   EXPECT_NE(ds, nullptr);
62 
63   // Create a Batch operation on ds
64   int32_t batch_size = 2;
65   ds = ds->Batch(batch_size);
66   EXPECT_NE(ds, nullptr);
67 
68   // Create an iterator over the result of the above dataset
69   // This will trigger the creation of the Execution Tree and launch it.
70   std::shared_ptr<Iterator> iter = ds->CreateIterator();
71   EXPECT_NE(iter, nullptr);
72 
73   // Iterate the dataset and get each row
74   std::unordered_map<std::string, mindspore::MSTensor> row;
75   ASSERT_OK(iter->GetNextRow(&row));
76 
77   uint64_t i = 0;
78   while (row.size() != 0) {
79     i++;
80     auto image = row["image"];
81     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
82     ASSERT_OK(iter->GetNextRow(&row));
83   }
84 
85   EXPECT_EQ(i, 12);
86 
87   // Manually terminate the pipeline
88   iter->Stop();
89 }
90 
TEST_F(MindDataTestPipeline,TestNoSamplerSuccess1)91 TEST_F(MindDataTestPipeline, TestNoSamplerSuccess1) {
92   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNoSamplerSuccess1.";
93   // Test building a dataset with no sampler provided (defaults to random sampler
94 
95   // Create an ImageFolder Dataset
96   std::string folder_path = datasets_root_path_ + "/testPK/data/";
97   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false);
98   EXPECT_NE(ds, nullptr);
99 
100   // Iterate the dataset and get each row
101   std::shared_ptr<Iterator> iter = ds->CreateIterator();
102   EXPECT_NE(iter, nullptr);
103   std::unordered_map<std::string, mindspore::MSTensor> row;
104   ASSERT_OK(iter->GetNextRow(&row));
105 
106   uint64_t i = 0;
107   while (row.size() != 0) {
108     i++;
109     auto label = row["label"];
110     ASSERT_OK(iter->GetNextRow(&row));
111   }
112 
113   EXPECT_EQ(i, ds->GetDatasetSize());
114   iter->Stop();
115 }
116 
TEST_F(MindDataTestPipeline,TestDistributedSamplerSuccess1)117 TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess1) {
118   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess1.";
119   // Test basic setting of distributed_sampler
120 
121   // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
122   std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, -1, true);
123   EXPECT_NE(sampler, nullptr);
124 
125   // Create an ImageFolder Dataset
126   std::string folder_path = datasets_root_path_ + "/testPK/data/";
127   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
128   EXPECT_NE(ds, nullptr);
129 
130   // Iterate the dataset and get each row
131   std::shared_ptr<Iterator> iter = ds->CreateIterator();
132   EXPECT_NE(iter, nullptr);
133   std::unordered_map<std::string, mindspore::MSTensor> row;
134   ASSERT_OK(iter->GetNextRow(&row));
135 
136   uint64_t i = 0;
137   while (row.size() != 0) {
138     i++;
139     auto label = row["label"];
140     ASSERT_OK(iter->GetNextRow(&row));
141   }
142 
143   EXPECT_EQ(i, 11);
144   iter->Stop();
145 }
146 
TEST_F(MindDataTestPipeline,TestDistributedSamplerSuccess2)147 TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess2) {
148   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess2.";
149   // Test basic setting of distributed_sampler
150 
151   // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
152   auto sampler(new DistributedSampler(4, 0, false, 0, 0, -1, true));
153   // Note that with new, we have to explicitly delete the allocated object as shown below.
154   // Note: No need to check for output after calling API class constructor
155 
156   // Create an ImageFolder Dataset
157   std::string folder_path = datasets_root_path_ + "/testPK/data/";
158   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
159   EXPECT_NE(ds, nullptr);
160 
161   // Iterate the dataset and get each row
162   std::shared_ptr<Iterator> iter = ds->CreateIterator();
163   EXPECT_NE(iter, nullptr);
164   std::unordered_map<std::string, mindspore::MSTensor> row;
165   ASSERT_OK(iter->GetNextRow(&row));
166 
167   uint64_t i = 0;
168   while (row.size() != 0) {
169     i++;
170     auto label = row["label"];
171     ASSERT_OK(iter->GetNextRow(&row));
172   }
173 
174   EXPECT_EQ(i, 11);
175   iter->Stop();
176 
177   // Delete allocated objects with raw pointers
178   delete sampler;
179 }
180 
TEST_F(MindDataTestPipeline,TestDistributedSamplerSuccess3)181 TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess3) {
182   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess3.";
183   // Test basic setting of distributed_sampler
184 
185   // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true
186   DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, -1, true);
187 
188   // Create an ImageFolder Dataset
189   std::string folder_path = datasets_root_path_ + "/testPK/data/";
190   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
191   EXPECT_NE(ds, nullptr);
192 
193   // Iterate the dataset and get each row
194   std::shared_ptr<Iterator> iter = ds->CreateIterator();
195   EXPECT_NE(iter, nullptr);
196   std::unordered_map<std::string, mindspore::MSTensor> row;
197   ASSERT_OK(iter->GetNextRow(&row));
198 
199   uint64_t i = 0;
200   while (row.size() != 0) {
201     i++;
202     auto label = row["label"];
203     ASSERT_OK(iter->GetNextRow(&row));
204   }
205 
206   EXPECT_EQ(i, 11);
207   iter->Stop();
208 }
209 
TEST_F(MindDataTestPipeline,TestDistributedSamplerSuccess4)210 TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess4) {
211   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess4.";
212   // Test pointer of distributed_sampler
213   SequentialSampler sampler = SequentialSampler(0, 4);
214 
215   // Create an ImageFolder Dataset
216   std::string folder_path = datasets_root_path_ + "/testVOC2012_2";
217   std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, &sampler);
218   EXPECT_NE(ds, nullptr);
219 
220   // Iterate the dataset and get each row
221   std::shared_ptr<Iterator> iter = ds->CreateIterator();
222   EXPECT_NE(iter, nullptr);
223   std::unordered_map<std::string, mindspore::MSTensor> row;
224   ASSERT_OK(iter->GetNextRow(&row));
225 
226   uint64_t i = 0;
227   while (row.size() != 0) {
228     i++;
229     auto label = row["label"];
230     ASSERT_OK(iter->GetNextRow(&row));
231   }
232 
233   EXPECT_EQ(i, 4);
234   iter->Stop();
235 }
236 
TEST_F(MindDataTestPipeline,TestDistributedSamplerFail1)237 TEST_F(MindDataTestPipeline, TestDistributedSamplerFail1) {
238   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail1.";
239   // Test basic setting of distributed_sampler
240 
241   // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
242   // offset=5 which is greater than num_shards=4 --> will fail later
243   std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, 5, false);
244   EXPECT_NE(sampler, nullptr);
245 
246   // Create an ImageFolder Dataset
247   std::string folder_path = datasets_root_path_ + "/testPK/data/";
248   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
249   EXPECT_NE(ds, nullptr);
250 
251   // Iterate will fail because sampler is not initiated successfully.
252   std::shared_ptr<Iterator> iter = ds->CreateIterator();
253   EXPECT_EQ(iter, nullptr);
254 }
255 
TEST_F(MindDataTestPipeline,TestDistributedSamplerFail2)256 TEST_F(MindDataTestPipeline, TestDistributedSamplerFail2) {
257   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail2.";
258   // Test basic setting of distributed_sampler
259 
260   // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
261   // offset=5 which is greater than num_shards=4 --> will fail later
262   auto sampler(new DistributedSampler(4, 0, false, 0, 0, 5, false));
263   // Note that with new, we have to explicitly delete the allocated object as shown below.
264   // Note: No need to check for output after calling API class constructor
265 
266   // Create an ImageFolder Dataset
267   std::string folder_path = datasets_root_path_ + "/testPK/data/";
268   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
269   EXPECT_NE(ds, nullptr);
270 
271   // Iterate will fail because sampler is not initiated successfully.
272   std::shared_ptr<Iterator> iter = ds->CreateIterator();
273   EXPECT_EQ(iter, nullptr);
274 
275   // Delete allocated objects with raw pointers
276   delete sampler;
277 }
278 
TEST_F(MindDataTestPipeline,TestDistributedSamplerFail3)279 TEST_F(MindDataTestPipeline, TestDistributedSamplerFail3) {
280   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail3.";
281   // Test basic setting of distributed_sampler
282 
283   // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true
284   // offset=5 which is greater than num_shards=4 --> will fail later
285   DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, 5, false);
286 
287   // Create an ImageFolder Dataset
288   std::string folder_path = datasets_root_path_ + "/testPK/data/";
289   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
290   EXPECT_NE(ds, nullptr);
291 
292   // Iterate will fail because sampler is not initiated successfully.
293   std::shared_ptr<Iterator> iter = ds->CreateIterator();
294   EXPECT_EQ(iter, nullptr);
295 }
296 
TEST_F(MindDataTestPipeline,TestSamplerAddChild)297 TEST_F(MindDataTestPipeline, TestSamplerAddChild) {
298   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSamplerAddChild.";
299 
300   auto sampler = std::make_shared<DistributedSampler>(1, 0, false, 5, 0, -1, true);
301   EXPECT_NE(sampler, nullptr);
302 
303   auto child_sampler = std::make_shared<SequentialSampler>();
304   EXPECT_NE(child_sampler, nullptr);
305 
306   sampler->AddChild(child_sampler);
307 
308   // Create an ImageFolder Dataset
309   std::string folder_path = datasets_root_path_ + "/testPK/data/";
310   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
311   EXPECT_NE(ds, nullptr);
312 
313   // Iterate the dataset and get each row
314   std::shared_ptr<Iterator> iter = ds->CreateIterator();
315   EXPECT_NE(iter, nullptr);
316   std::unordered_map<std::string, mindspore::MSTensor> row;
317   ASSERT_OK(iter->GetNextRow(&row));
318 
319   uint64_t i = 0;
320   while (row.size() != 0) {
321     i++;
322     ASSERT_OK(iter->GetNextRow(&row));
323   }
324 
325   EXPECT_EQ(ds->GetDatasetSize(), 5);
326   iter->Stop();
327 }
328 
TEST_F(MindDataTestPipeline,TestSubsetSamplerSuccess1)329 TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess1) {
330   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess1.";
331   // Test basic setting of subset_sampler with default num_samples
332 
333   std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
334   std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices);
335   EXPECT_NE(sampl, nullptr);
336 
337   // Create an ImageFolder Dataset
338   std::string folder_path = datasets_root_path_ + "/testPK/data/";
339   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
340   EXPECT_NE(ds, nullptr);
341 
342   // Iterate the dataset and get each row
343   std::shared_ptr<Iterator> iter = ds->CreateIterator();
344   EXPECT_NE(iter, nullptr);
345   std::unordered_map<std::string, mindspore::MSTensor> row;
346   ASSERT_OK(iter->GetNextRow(&row));
347 
348   uint64_t i = 0;
349   while (row.size() != 0) {
350     i++;
351     auto image = row["image"];
352     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
353     ASSERT_OK(iter->GetNextRow(&row));
354   }
355 
356   EXPECT_EQ(i, 6);
357   iter->Stop();
358 }
359 
TEST_F(MindDataTestPipeline,TestSubsetSamplerSuccess2)360 TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess2) {
361   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess2.";
362   // Test subset_sampler with num_samples
363 
364   std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
365   std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices, 3);
366   EXPECT_NE(sampl, nullptr);
367 
368   // Create an ImageFolder Dataset
369   std::string folder_path = datasets_root_path_ + "/testPK/data/";
370   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
371   EXPECT_NE(ds, nullptr);
372 
373   // Iterate the dataset and get each row
374   std::shared_ptr<Iterator> iter = ds->CreateIterator();
375   EXPECT_NE(iter, nullptr);
376   std::unordered_map<std::string, mindspore::MSTensor> row;
377   ASSERT_OK(iter->GetNextRow(&row));
378 
379   uint64_t i = 0;
380   while (row.size() != 0) {
381     i++;
382     auto image = row["image"];
383     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
384     ASSERT_OK(iter->GetNextRow(&row));
385   }
386 
387   EXPECT_EQ(i, 3);
388   iter->Stop();
389 }
390 
TEST_F(MindDataTestPipeline,TestSubsetSamplerSuccess3)391 TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess3) {
392   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess3.";
393   // Test subset_sampler with num_samples larger than the indices size.
394 
395   std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12};
396   std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices, 8);
397   EXPECT_NE(sampl, nullptr);
398 
399   // Create an ImageFolder Dataset
400   std::string folder_path = datasets_root_path_ + "/testPK/data/";
401   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
402   EXPECT_NE(ds, nullptr);
403 
404   // Iterate the dataset and get each row
405   std::shared_ptr<Iterator> iter = ds->CreateIterator();
406   EXPECT_NE(iter, nullptr);
407   std::unordered_map<std::string, mindspore::MSTensor> row;
408   ASSERT_OK(iter->GetNextRow(&row));
409 
410   uint64_t i = 0;
411   while (row.size() != 0) {
412     i++;
413     auto image = row["image"];
414     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
415     ASSERT_OK(iter->GetNextRow(&row));
416   }
417 
418   EXPECT_EQ(i, 6);
419   iter->Stop();
420 }
421 
TEST_F(MindDataTestPipeline,TestSubsetSamplerFail)422 TEST_F(MindDataTestPipeline, TestSubsetSamplerFail) {
423   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerFail.";
424   // Test subset_sampler with index out of bounds.
425 
426   std::vector<int64_t> indices = {2, 4, 6, 8, 10, 100};  // Sample ID (100) is out of bound
427   std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices);
428   EXPECT_NE(sampl, nullptr);
429 
430   // Create an ImageFolder Dataset
431   std::string folder_path = datasets_root_path_ + "/testPK/data/";
432   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl);
433   EXPECT_NE(ds, nullptr);
434 
435   // Iterate the dataset and get each row
436   std::shared_ptr<Iterator> iter = ds->CreateIterator();
437   EXPECT_NE(iter, nullptr);
438   std::unordered_map<std::string, mindspore::MSTensor> row;
439   // Expect failure: index 100 is out of dataset bounds
440   EXPECT_ERROR(iter->GetNextRow(&row));
441 
442   iter->Stop();
443 }
444