1 /** 2 * Copyright 2020-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "common/common.h" 17 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" 18 #include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h" 19 #include "minddata/dataset/include/dataset/datasets.h" 20 #include <functional> 21 22 using namespace mindspore::dataset; 23 using mindspore::dataset::Tensor; 24 25 class MindDataTestPipeline : public UT::DatasetOpTesting { 26 protected: 27 }; 28 29 TEST_F(MindDataTestPipeline, TestImageFolderWithSamplers) { 30 std::shared_ptr<Sampler> sampl = std::make_shared<DistributedSampler>(2, 1); 31 EXPECT_NE(sampl, nullptr); 32 33 sampl = std::make_shared<PKSampler>(3); 34 EXPECT_NE(sampl, nullptr); 35 36 sampl = std::make_shared<RandomSampler>(false, 12); 37 EXPECT_NE(sampl, nullptr); 38 39 sampl = std::make_shared<SequentialSampler>(0, 12); 40 EXPECT_NE(sampl, nullptr); 41 42 std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1}; 43 sampl = std::make_shared<WeightedRandomSampler>(weights, 12); 44 EXPECT_NE(sampl, nullptr); 45 46 std::vector<int64_t> indices = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23}; 47 sampl = std::make_shared<SubsetSampler>(indices); 48 EXPECT_NE(sampl, nullptr); 49 50 sampl = std::make_shared<SubsetRandomSampler>(indices); 51 EXPECT_NE(sampl, nullptr); 52 53 // Create an ImageFolder Dataset 54 std::string folder_path = datasets_root_path_ + "/testPK/data/"; 55 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl); 56 EXPECT_NE(ds, nullptr); 57 58 // Create a Repeat operation on ds 59 int32_t repeat_num = 2; 60 ds = ds->Repeat(repeat_num); 61 EXPECT_NE(ds, nullptr); 62 63 // Create a Batch operation on ds 64 int32_t batch_size = 2; 65 ds = ds->Batch(batch_size); 66 EXPECT_NE(ds, nullptr); 67 68 // Create an iterator over the result of the above dataset 69 // This will trigger the creation of the Execution Tree and launch it. 70 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 71 EXPECT_NE(iter, nullptr); 72 73 // Iterate the dataset and get each row 74 std::unordered_map<std::string, mindspore::MSTensor> row; 75 ASSERT_OK(iter->GetNextRow(&row)); 76 77 uint64_t i = 0; 78 while (row.size() != 0) { 79 i++; 80 auto image = row["image"]; 81 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 82 ASSERT_OK(iter->GetNextRow(&row)); 83 } 84 85 EXPECT_EQ(i, 12); 86 87 // Manually terminate the pipeline 88 iter->Stop(); 89 } 90 91 TEST_F(MindDataTestPipeline, TestNoSamplerSuccess1) { 92 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNoSamplerSuccess1."; 93 // Test building a dataset with no sampler provided (defaults to random sampler 94 95 // Create an ImageFolder Dataset 96 std::string folder_path = datasets_root_path_ + "/testPK/data/"; 97 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false); 98 EXPECT_NE(ds, nullptr); 99 100 // Iterate the dataset and get each row 101 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 102 EXPECT_NE(iter, nullptr); 103 std::unordered_map<std::string, mindspore::MSTensor> row; 104 ASSERT_OK(iter->GetNextRow(&row)); 105 106 uint64_t i = 0; 107 while (row.size() != 0) { 108 i++; 109 auto label = row["label"]; 110 ASSERT_OK(iter->GetNextRow(&row)); 111 } 112 113 EXPECT_EQ(i, ds->GetDatasetSize()); 114 iter->Stop(); 115 } 116 117 TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess1) { 118 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess1."; 119 // Test basic setting of distributed_sampler 120 121 // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true 122 std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, -1, true); 123 EXPECT_NE(sampler, nullptr); 124 125 // Create an ImageFolder Dataset 126 std::string folder_path = datasets_root_path_ + "/testPK/data/"; 127 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler); 128 EXPECT_NE(ds, nullptr); 129 130 // Iterate the dataset and get each row 131 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 132 EXPECT_NE(iter, nullptr); 133 std::unordered_map<std::string, mindspore::MSTensor> row; 134 ASSERT_OK(iter->GetNextRow(&row)); 135 136 uint64_t i = 0; 137 while (row.size() != 0) { 138 i++; 139 auto label = row["label"]; 140 ASSERT_OK(iter->GetNextRow(&row)); 141 } 142 143 EXPECT_EQ(i, 11); 144 iter->Stop(); 145 } 146 147 TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess2) { 148 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess2."; 149 // Test basic setting of distributed_sampler 150 151 // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true 152 auto sampler(new DistributedSampler(4, 0, false, 0, 0, -1, true)); 153 // Note that with new, we have to explicitly delete the allocated object as shown below. 154 // Note: No need to check for output after calling API class constructor 155 156 // Create an ImageFolder Dataset 157 std::string folder_path = datasets_root_path_ + "/testPK/data/"; 158 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler); 159 EXPECT_NE(ds, nullptr); 160 161 // Iterate the dataset and get each row 162 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 163 EXPECT_NE(iter, nullptr); 164 std::unordered_map<std::string, mindspore::MSTensor> row; 165 ASSERT_OK(iter->GetNextRow(&row)); 166 167 uint64_t i = 0; 168 while (row.size() != 0) { 169 i++; 170 auto label = row["label"]; 171 ASSERT_OK(iter->GetNextRow(&row)); 172 } 173 174 EXPECT_EQ(i, 11); 175 iter->Stop(); 176 177 // Delete allocated objects with raw pointers 178 delete sampler; 179 } 180 181 TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess3) { 182 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess3."; 183 // Test basic setting of distributed_sampler 184 185 // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=-1, even_dist=true 186 DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, -1, true); 187 188 // Create an ImageFolder Dataset 189 std::string folder_path = datasets_root_path_ + "/testPK/data/"; 190 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler); 191 EXPECT_NE(ds, nullptr); 192 193 // Iterate the dataset and get each row 194 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 195 EXPECT_NE(iter, nullptr); 196 std::unordered_map<std::string, mindspore::MSTensor> row; 197 ASSERT_OK(iter->GetNextRow(&row)); 198 199 uint64_t i = 0; 200 while (row.size() != 0) { 201 i++; 202 auto label = row["label"]; 203 ASSERT_OK(iter->GetNextRow(&row)); 204 } 205 206 EXPECT_EQ(i, 11); 207 iter->Stop(); 208 } 209 210 TEST_F(MindDataTestPipeline, TestDistributedSamplerSuccess4) { 211 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerSuccess4."; 212 // Test pointer of distributed_sampler 213 SequentialSampler sampler = SequentialSampler(0, 4); 214 215 // Create an ImageFolder Dataset 216 std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; 217 std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, &sampler); 218 EXPECT_NE(ds, nullptr); 219 220 // Iterate the dataset and get each row 221 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 222 EXPECT_NE(iter, nullptr); 223 std::unordered_map<std::string, mindspore::MSTensor> row; 224 ASSERT_OK(iter->GetNextRow(&row)); 225 226 uint64_t i = 0; 227 while (row.size() != 0) { 228 i++; 229 auto label = row["label"]; 230 ASSERT_OK(iter->GetNextRow(&row)); 231 } 232 233 EXPECT_EQ(i, 4); 234 iter->Stop(); 235 } 236 237 TEST_F(MindDataTestPipeline, TestDistributedSamplerFail1) { 238 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail1."; 239 // Test basic setting of distributed_sampler 240 241 // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true 242 // offset=5 which is greater than num_shards=4 --> will fail later 243 std::shared_ptr<Sampler> sampler = std::make_shared<DistributedSampler>(4, 0, false, 0, 0, 5, false); 244 EXPECT_NE(sampler, nullptr); 245 246 // Create an ImageFolder Dataset 247 std::string folder_path = datasets_root_path_ + "/testPK/data/"; 248 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler); 249 EXPECT_NE(ds, nullptr); 250 251 // Iterate will fail because sampler is not initiated successfully. 252 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 253 EXPECT_EQ(iter, nullptr); 254 } 255 256 TEST_F(MindDataTestPipeline, TestDistributedSamplerFail2) { 257 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail2."; 258 // Test basic setting of distributed_sampler 259 260 // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true 261 // offset=5 which is greater than num_shards=4 --> will fail later 262 auto sampler(new DistributedSampler(4, 0, false, 0, 0, 5, false)); 263 // Note that with new, we have to explicitly delete the allocated object as shown below. 264 // Note: No need to check for output after calling API class constructor 265 266 // Create an ImageFolder Dataset 267 std::string folder_path = datasets_root_path_ + "/testPK/data/"; 268 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler); 269 EXPECT_NE(ds, nullptr); 270 271 // Iterate will fail because sampler is not initiated successfully. 272 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 273 EXPECT_EQ(iter, nullptr); 274 275 // Delete allocated objects with raw pointers 276 delete sampler; 277 } 278 279 TEST_F(MindDataTestPipeline, TestDistributedSamplerFail3) { 280 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedSamplerFail3."; 281 // Test basic setting of distributed_sampler 282 283 // num_shards=4, shard_id=0, shuffle=false, num_samplers=0, seed=0, offset=5, even_dist=true 284 // offset=5 which is greater than num_shards=4 --> will fail later 285 DistributedSampler sampler = DistributedSampler(4, 0, false, 0, 0, 5, false); 286 287 // Create an ImageFolder Dataset 288 std::string folder_path = datasets_root_path_ + "/testPK/data/"; 289 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler); 290 EXPECT_NE(ds, nullptr); 291 292 // Iterate will fail because sampler is not initiated successfully. 293 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 294 EXPECT_EQ(iter, nullptr); 295 } 296 297 TEST_F(MindDataTestPipeline, TestSamplerAddChild) { 298 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSamplerAddChild."; 299 300 auto sampler = std::make_shared<DistributedSampler>(1, 0, false, 5, 0, -1, true); 301 EXPECT_NE(sampler, nullptr); 302 303 auto child_sampler = std::make_shared<SequentialSampler>(); 304 EXPECT_NE(child_sampler, nullptr); 305 306 sampler->AddChild(child_sampler); 307 308 // Create an ImageFolder Dataset 309 std::string folder_path = datasets_root_path_ + "/testPK/data/"; 310 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler); 311 EXPECT_NE(ds, nullptr); 312 313 // Iterate the dataset and get each row 314 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 315 EXPECT_NE(iter, nullptr); 316 std::unordered_map<std::string, mindspore::MSTensor> row; 317 ASSERT_OK(iter->GetNextRow(&row)); 318 319 uint64_t i = 0; 320 while (row.size() != 0) { 321 i++; 322 ASSERT_OK(iter->GetNextRow(&row)); 323 } 324 325 EXPECT_EQ(ds->GetDatasetSize(), 5); 326 iter->Stop(); 327 } 328 329 TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess1) { 330 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess1."; 331 // Test basic setting of subset_sampler with default num_samples 332 333 std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12}; 334 std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices); 335 EXPECT_NE(sampl, nullptr); 336 337 // Create an ImageFolder Dataset 338 std::string folder_path = datasets_root_path_ + "/testPK/data/"; 339 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl); 340 EXPECT_NE(ds, nullptr); 341 342 // Iterate the dataset and get each row 343 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 344 EXPECT_NE(iter, nullptr); 345 std::unordered_map<std::string, mindspore::MSTensor> row; 346 ASSERT_OK(iter->GetNextRow(&row)); 347 348 uint64_t i = 0; 349 while (row.size() != 0) { 350 i++; 351 auto image = row["image"]; 352 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 353 ASSERT_OK(iter->GetNextRow(&row)); 354 } 355 356 EXPECT_EQ(i, 6); 357 iter->Stop(); 358 } 359 360 TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess2) { 361 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess2."; 362 // Test subset_sampler with num_samples 363 364 std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12}; 365 std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices, 3); 366 EXPECT_NE(sampl, nullptr); 367 368 // Create an ImageFolder Dataset 369 std::string folder_path = datasets_root_path_ + "/testPK/data/"; 370 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl); 371 EXPECT_NE(ds, nullptr); 372 373 // Iterate the dataset and get each row 374 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 375 EXPECT_NE(iter, nullptr); 376 std::unordered_map<std::string, mindspore::MSTensor> row; 377 ASSERT_OK(iter->GetNextRow(&row)); 378 379 uint64_t i = 0; 380 while (row.size() != 0) { 381 i++; 382 auto image = row["image"]; 383 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 384 ASSERT_OK(iter->GetNextRow(&row)); 385 } 386 387 EXPECT_EQ(i, 3); 388 iter->Stop(); 389 } 390 391 TEST_F(MindDataTestPipeline, TestSubsetSamplerSuccess3) { 392 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerSuccess3."; 393 // Test subset_sampler with num_samples larger than the indices size. 394 395 std::vector<int64_t> indices = {2, 4, 6, 8, 10, 12}; 396 std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices, 8); 397 EXPECT_NE(sampl, nullptr); 398 399 // Create an ImageFolder Dataset 400 std::string folder_path = datasets_root_path_ + "/testPK/data/"; 401 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl); 402 EXPECT_NE(ds, nullptr); 403 404 // Iterate the dataset and get each row 405 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 406 EXPECT_NE(iter, nullptr); 407 std::unordered_map<std::string, mindspore::MSTensor> row; 408 ASSERT_OK(iter->GetNextRow(&row)); 409 410 uint64_t i = 0; 411 while (row.size() != 0) { 412 i++; 413 auto image = row["image"]; 414 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 415 ASSERT_OK(iter->GetNextRow(&row)); 416 } 417 418 EXPECT_EQ(i, 6); 419 iter->Stop(); 420 } 421 422 TEST_F(MindDataTestPipeline, TestSubsetSamplerFail) { 423 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSubsetSamplerFail."; 424 // Test subset_sampler with index out of bounds. 425 426 std::vector<int64_t> indices = {2, 4, 6, 8, 10, 100}; // Sample ID (100) is out of bound 427 std::shared_ptr<Sampler> sampl = std::make_shared<SubsetSampler>(indices); 428 EXPECT_NE(sampl, nullptr); 429 430 // Create an ImageFolder Dataset 431 std::string folder_path = datasets_root_path_ + "/testPK/data/"; 432 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampl); 433 EXPECT_NE(ds, nullptr); 434 435 // Iterate the dataset and get each row 436 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 437 EXPECT_NE(iter, nullptr); 438 std::unordered_map<std::string, mindspore::MSTensor> row; 439 // Expect failure: index 100 is out of dataset bounds 440 EXPECT_ERROR(iter->GetNextRow(&row)); 441 442 iter->Stop(); 443 } 444