1 /** 2 * Copyright 2020-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "common/common.h" 17 #include "minddata/dataset/include/dataset/datasets.h" 18 #include "minddata/dataset/include/dataset/vision.h" 19 20 using namespace mindspore::dataset; 21 22 // Helper function to get the session id from SESSION_ID env variable 23 Status GetSessionFromEnv(session_id_type *session_id); 24 25 class MindDataTestCacheOp : public UT::DatasetOpTesting { 26 public: 27 void SetUp() override { DatasetOpTesting::SetUp(); } 28 }; 29 30 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiSamplerNull) { 31 session_id_type env_session; 32 Status s = GetSessionFromEnv(&env_session); 33 EXPECT_EQ(s, Status::OK()); 34 35 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false, "127.0.0.1", 50053, 1, 1); 36 EXPECT_NE(some_cache, nullptr); 37 38 // Create an ImageFolder Dataset, this folder_path only has 2 images in it 39 std::string folder_path = datasets_root_path_ + "/testImageNetData/train/"; 40 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, nullptr, {}, {}, some_cache); 41 EXPECT_NE(ds, nullptr); 42 43 // Create an iterator over the result of the above dataset 44 // This will trigger the creation of the Execution Tree and launch it. 45 // Now the parameter check for ImageFolderNode would fail and we would end up with a nullptr iter. 46 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 47 EXPECT_EQ(iter, nullptr); 48 } 49 50 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiNestedCache) { 51 session_id_type env_session; 52 Status s = GetSessionFromEnv(&env_session); 53 EXPECT_EQ(s, Status::OK()); 54 55 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 56 EXPECT_NE(some_cache, nullptr); 57 58 // Create an ImageFolder Dataset, this folder_path only has 2 images in it 59 std::string folder_path = datasets_root_path_ + "/testImageNetData/train/"; 60 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache); 61 EXPECT_NE(ds, nullptr); 62 63 // Create objects for the tensor ops 64 std::shared_ptr<TensorTransform> decode_op = std::make_shared<vision::Decode>(); 65 EXPECT_NE(decode_op, nullptr); 66 67 // Create a Map operation on ds 68 ds = ds->Map({decode_op}, {}, {}, {"image"}, some_cache); 69 EXPECT_NE(ds, nullptr); 70 71 // Create an iterator over the result of the above dataset 72 // This will trigger the creation of the Execution Tree and launch it. 73 // Now in the cache_error_pass would fail and we would end up with a nullptr iter. 74 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 75 EXPECT_EQ(iter, nullptr); 76 } 77 78 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheImageFolderCApi) { 79 session_id_type env_session; 80 Status s = GetSessionFromEnv(&env_session); 81 EXPECT_EQ(s, Status::OK()); 82 83 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 84 EXPECT_NE(some_cache, nullptr); 85 86 // Create an ImageFolder Dataset, this folder_path only has 2 images in it 87 std::string folder_path = datasets_root_path_ + "/testImageNetData/train/"; 88 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache); 89 EXPECT_NE(ds, nullptr); 90 91 // Create a Repeat operation on ds 92 int32_t repeat_num = 2; 93 ds = ds->Repeat(repeat_num); 94 EXPECT_NE(ds, nullptr); 95 96 // Create an iterator over the result of the above dataset 97 // This will trigger the creation of the Execution Tree and launch it. 98 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 99 EXPECT_NE(iter, nullptr); 100 101 // Iterate the dataset and get each row 102 std::unordered_map<std::string, mindspore::MSTensor> row; 103 ASSERT_OK(iter->GetNextRow(&row)); 104 105 uint64_t i = 0; 106 while (row.size() != 0) { 107 i++; 108 auto image = row["image"]; 109 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 110 ASSERT_OK(iter->GetNextRow(&row)); 111 } 112 113 EXPECT_EQ(i, 4); 114 115 // Manually terminate the pipeline 116 iter->Stop(); 117 } 118 119 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCocoCApi) { 120 session_id_type env_session; 121 Status s = GetSessionFromEnv(&env_session); 122 EXPECT_EQ(s, Status::OK()); 123 124 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 125 EXPECT_NE(some_cache, nullptr); 126 127 // Create a Coco Dataset, this folder_path has 6 images in it 128 std::string folder_path = datasets_root_path_ + "/testCOCO/train/"; 129 std::string annotation_file_path = datasets_root_path_ + "/testCOCO/annotations/train.json"; 130 std::shared_ptr<Dataset> ds = 131 Coco(folder_path, annotation_file_path, "Detection", false, std::make_shared<RandomSampler>(), some_cache); 132 EXPECT_NE(ds, nullptr); 133 134 // Create a Repeat operation on ds 135 int32_t repeat_num = 2; 136 ds = ds->Repeat(repeat_num); 137 EXPECT_NE(ds, nullptr); 138 139 // Create an iterator over the result of the above dataset 140 // This will trigger the creation of the Execution Tree and launch it. 141 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 142 EXPECT_NE(iter, nullptr); 143 144 // Iterate the dataset and get each row 145 std::unordered_map<std::string, mindspore::MSTensor> row; 146 ASSERT_OK(iter->GetNextRow(&row)); 147 148 uint64_t i = 0; 149 while (row.size() != 0) { 150 i++; 151 auto image = row["image"]; 152 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 153 ASSERT_OK(iter->GetNextRow(&row)); 154 } 155 156 EXPECT_EQ(i, 12); 157 158 // Manually terminate the pipeline 159 iter->Stop(); 160 } 161 162 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheMnistCApi) { 163 session_id_type env_session; 164 Status s = GetSessionFromEnv(&env_session); 165 EXPECT_EQ(s, Status::OK()); 166 167 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 168 EXPECT_NE(some_cache, nullptr); 169 170 // Create a Mnist Dataset 171 std::string folder_path = datasets_root_path_ + "/testMnistData/"; 172 std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache); 173 EXPECT_NE(ds, nullptr); 174 175 // Create a Repeat operation on ds 176 int32_t repeat_num = 2; 177 ds = ds->Repeat(repeat_num); 178 EXPECT_NE(ds, nullptr); 179 180 // Create an iterator over the result of the above dataset 181 // This will trigger the creation of the Execution Tree and launch it. 182 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 183 EXPECT_NE(iter, nullptr); 184 185 // Iterate the dataset and get each row 186 std::unordered_map<std::string, mindspore::MSTensor> row; 187 ASSERT_OK(iter->GetNextRow(&row)); 188 189 uint64_t i = 0; 190 while (row.size() != 0) { 191 i++; 192 auto image = row["image"]; 193 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 194 ASSERT_OK(iter->GetNextRow(&row)); 195 } 196 197 EXPECT_EQ(i, 20); 198 199 // Manually terminate the pipeline 200 iter->Stop(); 201 } 202 203 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCelebaCApi) { 204 session_id_type env_session; 205 Status s = GetSessionFromEnv(&env_session); 206 EXPECT_EQ(s, Status::OK()); 207 208 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 209 EXPECT_NE(some_cache, nullptr); 210 211 // Create a CelebA Dataset, this folder_path has 4 records in it 212 std::string folder_path = datasets_root_path_ + "/testCelebAData/"; 213 std::shared_ptr<Dataset> ds = 214 CelebA(folder_path, "all", std::make_shared<RandomSampler>(false, 10), false, {}, some_cache); 215 EXPECT_NE(ds, nullptr); 216 217 // Create a Repeat operation on ds 218 int32_t repeat_num = 2; 219 ds = ds->Repeat(repeat_num); 220 EXPECT_NE(ds, nullptr); 221 222 // Create an iterator over the result of the above dataset 223 // This will trigger the creation of the Execution Tree and launch it. 224 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 225 EXPECT_NE(iter, nullptr); 226 227 // Iterate the dataset and get each row 228 std::unordered_map<std::string, mindspore::MSTensor> row; 229 ASSERT_OK(iter->GetNextRow(&row)); 230 231 uint64_t i = 0; 232 while (row.size() != 0) { 233 i++; 234 auto image = row["image"]; 235 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 236 ASSERT_OK(iter->GetNextRow(&row)); 237 } 238 239 EXPECT_EQ(i, 8); 240 241 // Manually terminate the pipeline 242 iter->Stop(); 243 } 244 245 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheManifestCApi) { 246 session_id_type env_session; 247 Status s = GetSessionFromEnv(&env_session); 248 EXPECT_EQ(s, Status::OK()); 249 250 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 251 EXPECT_NE(some_cache, nullptr); 252 253 // Create a Manifest Dataset, this file_path has 2 records in it 254 std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json"; 255 std::shared_ptr<Dataset> ds = Manifest(file_path, "train", std::make_shared<RandomSampler>(), {}, false, some_cache); 256 EXPECT_NE(ds, nullptr); 257 258 // Create a Repeat operation on ds 259 int32_t repeat_num = 2; 260 ds = ds->Repeat(repeat_num); 261 EXPECT_NE(ds, nullptr); 262 263 // Create an iterator over the result of the above dataset 264 // This will trigger the creation of the Execution Tree and launch it. 265 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 266 EXPECT_NE(iter, nullptr); 267 268 // Iterate the dataset and get each row 269 std::unordered_map<std::string, mindspore::MSTensor> row; 270 ASSERT_OK(iter->GetNextRow(&row)); 271 272 uint64_t i = 0; 273 while (row.size() != 0) { 274 i++; 275 auto image = row["image"]; 276 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 277 ASSERT_OK(iter->GetNextRow(&row)); 278 } 279 280 EXPECT_EQ(i, 4); 281 282 // Manually terminate the pipeline 283 iter->Stop(); 284 } 285 286 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar10CApi) { 287 session_id_type env_session; 288 Status s = GetSessionFromEnv(&env_session); 289 EXPECT_EQ(s, Status::OK()); 290 291 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 292 EXPECT_NE(some_cache, nullptr); 293 294 // Create a Cifar10 Dataset 295 std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; 296 std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache); 297 EXPECT_NE(ds, nullptr); 298 299 // Create a Repeat operation on ds 300 int32_t repeat_num = 2; 301 ds = ds->Repeat(repeat_num); 302 EXPECT_NE(ds, nullptr); 303 304 // Create an iterator over the result of the above dataset 305 // This will trigger the creation of the Execution Tree and launch it. 306 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 307 EXPECT_NE(iter, nullptr); 308 309 // Iterate the dataset and get each row 310 std::unordered_map<std::string, mindspore::MSTensor> row; 311 ASSERT_OK(iter->GetNextRow(&row)); 312 313 uint64_t i = 0; 314 while (row.size() != 0) { 315 i++; 316 auto image = row["image"]; 317 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 318 ASSERT_OK(iter->GetNextRow(&row)); 319 } 320 321 EXPECT_EQ(i, 20); 322 323 // Manually terminate the pipeline 324 iter->Stop(); 325 } 326 327 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar100CApi) { 328 session_id_type env_session; 329 Status s = GetSessionFromEnv(&env_session); 330 EXPECT_EQ(s, Status::OK()); 331 332 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 333 EXPECT_NE(some_cache, nullptr); 334 335 // Create a Cifar100 Dataset 336 std::string folder_path = datasets_root_path_ + "/testCifar100Data/"; 337 std::shared_ptr<Dataset> ds = Cifar100(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache); 338 EXPECT_NE(ds, nullptr); 339 340 // Create a Repeat operation on ds 341 int32_t repeat_num = 2; 342 ds = ds->Repeat(repeat_num); 343 EXPECT_NE(ds, nullptr); 344 345 // Create an iterator over the result of the above dataset 346 // This will trigger the creation of the Execution Tree and launch it. 347 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 348 EXPECT_NE(iter, nullptr); 349 350 // Iterate the dataset and get each row 351 std::unordered_map<std::string, mindspore::MSTensor> row; 352 ASSERT_OK(iter->GetNextRow(&row)); 353 354 uint64_t i = 0; 355 while (row.size() != 0) { 356 i++; 357 auto image = row["image"]; 358 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 359 ASSERT_OK(iter->GetNextRow(&row)); 360 } 361 362 EXPECT_EQ(i, 20); 363 364 // Manually terminate the pipeline 365 iter->Stop(); 366 } 367 368 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheVocCApi) { 369 session_id_type env_session; 370 Status s = GetSessionFromEnv(&env_session); 371 EXPECT_EQ(s, Status::OK()); 372 373 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 374 EXPECT_NE(some_cache, nullptr); 375 376 // Create a VOC Dataset, this folder_path has 9 records in it 377 std::string folder_path = datasets_root_path_ + "/testVOC2012/"; 378 std::shared_ptr<Dataset> ds = 379 VOC(folder_path, "Detection", "train", {}, false, std::make_shared<RandomSampler>(), some_cache); 380 EXPECT_NE(ds, nullptr); 381 382 // Create a Repeat operation on ds 383 int32_t repeat_num = 2; 384 ds = ds->Repeat(repeat_num); 385 EXPECT_NE(ds, nullptr); 386 387 // Create an iterator over the result of the above dataset 388 // This will trigger the creation of the Execution Tree and launch it. 389 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 390 EXPECT_NE(iter, nullptr); 391 392 // Iterate the dataset and get each row 393 std::unordered_map<std::string, mindspore::MSTensor> row; 394 ASSERT_OK(iter->GetNextRow(&row)); 395 396 uint64_t i = 0; 397 while (row.size() != 0) { 398 i++; 399 auto image = row["image"]; 400 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 401 ASSERT_OK(iter->GetNextRow(&row)); 402 } 403 404 EXPECT_EQ(i, 18); 405 406 // Manually terminate the pipeline 407 iter->Stop(); 408 } 409 410 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheAlbumCApi) { 411 session_id_type env_session; 412 Status s = GetSessionFromEnv(&env_session); 413 EXPECT_EQ(s, Status::OK()); 414 415 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 416 EXPECT_NE(some_cache, nullptr); 417 418 std::string folder_path = datasets_root_path_ + "/testAlbum/images"; 419 std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json"; 420 std::vector<std::string> column_names = {"image", "label", "id"}; 421 // Create a Album Dataset, 7 records in it 422 std::shared_ptr<Dataset> ds = 423 Album(folder_path, schema_file, column_names, false, std::make_shared<RandomSampler>(), some_cache); 424 EXPECT_NE(ds, nullptr); 425 426 // Create a Repeat operation on ds 427 int32_t repeat_num = 2; 428 ds = ds->Repeat(repeat_num); 429 EXPECT_NE(ds, nullptr); 430 431 // Create an iterator over the result of the above dataset 432 // This will trigger the creation of the Execution Tree and launch it. 433 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 434 EXPECT_NE(iter, nullptr); 435 436 // Iterate the dataset and get each row 437 std::unordered_map<std::string, mindspore::MSTensor> row; 438 ASSERT_OK(iter->GetNextRow(&row)); 439 440 uint64_t i = 0; 441 while (row.size() != 0) { 442 i++; 443 ASSERT_OK(iter->GetNextRow(&row)); 444 } 445 446 EXPECT_EQ(i, 14); 447 448 // Manually terminate the pipeline 449 iter->Stop(); 450 } 451 452 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheMindRecordCApi) { 453 session_id_type env_session; 454 Status s = GetSessionFromEnv(&env_session); 455 EXPECT_EQ(s, Status::OK()); 456 457 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 458 EXPECT_NE(some_cache, nullptr); 459 460 // Create a MindData Dataset 461 // Pass one mindrecord shard file to parse dataset info, and search for other mindrecord files with same dataset info, 462 // thus all records in imagenet.mindrecord0 ~ imagenet.mindrecord3 will be read 463 std::string file_path = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0"; 464 465 // Create a MindRecord Dataset, 20 records in it 466 std::shared_ptr<Dataset> ds = MindData(file_path, {}, std::make_shared<RandomSampler>(), nullptr, 0, 467 ShuffleMode::kGlobal, some_cache); 468 EXPECT_NE(ds, nullptr); 469 470 // Create an iterator over the result of the above dataset 471 // This will trigger the creation of the Execution Tree and launch it. 472 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 473 EXPECT_NE(iter, nullptr); 474 475 // Iterate the dataset and get each row 476 std::unordered_map<std::string, mindspore::MSTensor> row; 477 ASSERT_OK(iter->GetNextRow(&row)); 478 479 uint64_t i = 0; 480 while (row.size() != 0) { 481 i++; 482 ASSERT_OK(iter->GetNextRow(&row)); 483 } 484 485 EXPECT_EQ(i, 20); 486 487 // Manually terminate the pipeline 488 iter->Stop(); 489 } 490 491 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataCApi) { 492 session_id_type env_session; 493 Status s = GetSessionFromEnv(&env_session); 494 EXPECT_EQ(s, Status::OK()); 495 496 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 497 EXPECT_NE(some_cache, nullptr); 498 499 // Create a RandomDataset 500 std::shared_ptr<SchemaObj> schema = Schema(); 501 502 ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2})); 503 ASSERT_OK(schema->add_column("label", mindspore::DataType::kNumberTypeUInt8, {1})); 504 std::shared_ptr<Dataset> ds = RandomData(8, schema, {}, some_cache); 505 EXPECT_NE(ds, nullptr); 506 507 // Create a Repeat operation on ds 508 int32_t repeat_num = 2; 509 ds = ds->Repeat(repeat_num); 510 EXPECT_NE(ds, nullptr); 511 512 // Create an iterator over the result of the above dataset 513 // This will trigger the creation of the Execution Tree and launch it. 514 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 515 EXPECT_NE(iter, nullptr); 516 517 // Iterate the dataset and get each row 518 std::unordered_map<std::string, mindspore::MSTensor> row; 519 ASSERT_OK(iter->GetNextRow(&row)); 520 521 uint64_t i = 0; 522 while (row.size() != 0) { 523 i++; 524 ASSERT_OK(iter->GetNextRow(&row)); 525 } 526 527 EXPECT_EQ(i, 16); 528 529 // Manually terminate the pipeline 530 iter->Stop(); 531 } 532 533 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi1) { 534 session_id_type env_session; 535 Status s = GetSessionFromEnv(&env_session); 536 EXPECT_EQ(s, Status::OK()); 537 538 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 539 EXPECT_NE(some_cache, nullptr); 540 541 // Create a TFRecord Dataset, this file_path has 3 records in it 542 std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data"; 543 std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json"; 544 std::shared_ptr<Dataset> ds = 545 TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 1, 0, false, some_cache); 546 EXPECT_NE(ds, nullptr); 547 548 // Create a Repeat operation on ds 549 int32_t repeat_num = 2; 550 ds = ds->Repeat(repeat_num); 551 EXPECT_NE(ds, nullptr); 552 553 // Create an iterator over the result of the above dataset 554 // This will trigger the creation of the Execution Tree and launch it. 555 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 556 EXPECT_NE(iter, nullptr); 557 558 // Iterate the dataset and get each row 559 std::unordered_map<std::string, mindspore::MSTensor> row; 560 ASSERT_OK(iter->GetNextRow(&row)); 561 562 uint64_t i = 0; 563 while (row.size() != 0) { 564 i++; 565 auto image = row["image"]; 566 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 567 ASSERT_OK(iter->GetNextRow(&row)); 568 } 569 570 EXPECT_EQ(i, 6); 571 572 // Manually terminate the pipeline 573 iter->Stop(); 574 } 575 576 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi2) { 577 session_id_type env_session; 578 Status s = GetSessionFromEnv(&env_session); 579 EXPECT_EQ(s, Status::OK()); 580 581 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 582 EXPECT_NE(some_cache, nullptr); 583 584 // Create a TFRecord Dataset, this file_path has 3 records in it 585 std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data"; 586 std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json"; 587 588 // In this one, the TFRecord dataset will be given sharding configuration, however since a cache is 589 // used, the tree prepare should undo the sharding configuration and instead, a distributed 590 // sampler will be chosen with the same shard config. 591 // With only 3 records shard into 3, we expect only 1 record returned for this shard 592 // However, the sharding will be done by the sampler, not by the TFRecord leaf node 593 // In this case, it is a row-based sharding, not the file-based sharding that would happen if 594 // there was not any cache. 595 std::shared_ptr<Dataset> ds = 596 TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 3, 0, false, some_cache); 597 EXPECT_NE(ds, nullptr); 598 599 // Create a Repeat operation on ds 600 int32_t repeat_num = 2; 601 ds = ds->Repeat(repeat_num); 602 EXPECT_NE(ds, nullptr); 603 604 // Create an iterator over the result of the above dataset 605 // This will trigger the creation of the Execution Tree and launch it. 606 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 607 EXPECT_NE(iter, nullptr); 608 609 // Iterate the dataset and get each row 610 std::unordered_map<std::string, mindspore::MSTensor> row; 611 ASSERT_OK(iter->GetNextRow(&row)); 612 613 uint64_t i = 0; 614 while (row.size() != 0) { 615 i++; 616 auto image = row["image"]; 617 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 618 ASSERT_OK(iter->GetNextRow(&row)); 619 } 620 621 EXPECT_EQ(i, 2); 622 623 // Manually terminate the pipeline 624 iter->Stop(); 625 } 626 627 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi3) { 628 session_id_type env_session; 629 Status s = GetSessionFromEnv(&env_session); 630 EXPECT_EQ(s, Status::OK()); 631 632 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 633 EXPECT_NE(some_cache, nullptr); 634 635 // Create a TFRecord Dataset, this file_path has 3 records in it 636 std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data"; 637 std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json"; 638 639 // In this one, a num_samples argument is given. 640 // In this case, a sequential sampler would be chosen with the same num_samples argument. 641 // The samples will be selected by the sequential sampler, not by the TFRecord leaf node. 642 std::shared_ptr<Dataset> ds = 643 TFRecord({file_path}, schema_path, {"image"}, 2, ShuffleMode::kFalse, 1, 0, false, some_cache); 644 EXPECT_NE(ds, nullptr); 645 646 // Create a Repeat operation on ds 647 int32_t repeat_num = 2; 648 ds = ds->Repeat(repeat_num); 649 EXPECT_NE(ds, nullptr); 650 651 // Create an iterator over the result of the above dataset 652 // This will trigger the creation of the Execution Tree and launch it. 653 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 654 EXPECT_NE(iter, nullptr); 655 656 // Iterate the dataset and get each row 657 std::unordered_map<std::string, mindspore::MSTensor> row; 658 ASSERT_OK(iter->GetNextRow(&row)); 659 660 uint64_t i = 0; 661 while (row.size() != 0) { 662 i++; 663 auto image = row["image"]; 664 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 665 ASSERT_OK(iter->GetNextRow(&row)); 666 } 667 668 EXPECT_EQ(i, 4); 669 670 // Manually terminate the pipeline 671 iter->Stop(); 672 } 673 674 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTextfileCApi) { 675 session_id_type env_session; 676 Status s = GetSessionFromEnv(&env_session); 677 EXPECT_EQ(s, Status::OK()); 678 679 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 680 EXPECT_NE(some_cache, nullptr); 681 682 // Create a TextFile Dataset, this file_path has 3 records in it 683 std::string file_path = datasets_root_path_ + "/testTextFileDataset/1.txt"; 684 685 // In this one, a num_samples=2 argument is given. 686 // In this case, a sequential sampler would be chosen with the same num_samples argument. 687 // The samples will be selected by the sequential sampler, not by the TextFile leaf node. 688 std::shared_ptr<Dataset> ds = TextFile({file_path}, 2, ShuffleMode::kGlobal, 1, 0, some_cache); 689 EXPECT_NE(ds, nullptr); 690 691 // Create a Repeat operation on ds 692 int32_t repeat_num = 2; 693 ds = ds->Repeat(repeat_num); 694 EXPECT_NE(ds, nullptr); 695 696 // Create an iterator over the result of the above dataset 697 // This will trigger the creation of the Execution Tree and launch it. 698 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 699 EXPECT_NE(iter, nullptr); 700 701 // Iterate the dataset and get each row 702 std::unordered_map<std::string, mindspore::MSTensor> row; 703 ASSERT_OK(iter->GetNextRow(&row)); 704 705 uint64_t i = 0; 706 while (row.size() != 0) { 707 i++; 708 ASSERT_OK(iter->GetNextRow(&row)); 709 } 710 711 EXPECT_EQ(i, 4); 712 713 // Manually terminate the pipeline 714 iter->Stop(); 715 } 716 717 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCsvCApi) { 718 session_id_type env_session; 719 Status s = GetSessionFromEnv(&env_session); 720 EXPECT_EQ(s, Status::OK()); 721 722 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 723 EXPECT_NE(some_cache, nullptr); 724 725 // Create a CSV Dataset, this file_path has 3 records in it 726 std::string file_path = datasets_root_path_ + "/testCSV/1.csv"; 727 std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; 728 729 // In this one, a num_samples=2 argument is given. 730 // In this case, a sequential sampler would be chosen with the same num_samples argument. 731 // The samples will be selected by the sequential sampler, not by the CSV leaf node. 732 std::shared_ptr<Dataset> ds = CSV({file_path}, ',', {}, column_names, 2, ShuffleMode::kFalse, 1, 0, some_cache); 733 EXPECT_NE(ds, nullptr); 734 735 // Create a Repeat operation on ds 736 int32_t repeat_num = 2; 737 ds = ds->Repeat(repeat_num); 738 EXPECT_NE(ds, nullptr); 739 740 // Create an iterator over the result of the above dataset 741 // This will trigger the creation of the Execution Tree and launch it. 742 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 743 EXPECT_NE(iter, nullptr); 744 745 // Iterate the dataset and get each row 746 std::unordered_map<std::string, mindspore::MSTensor> row; 747 ASSERT_OK(iter->GetNextRow(&row)); 748 749 uint64_t i = 0; 750 while (row.size() != 0) { 751 i++; 752 ASSERT_OK(iter->GetNextRow(&row)); 753 } 754 755 EXPECT_EQ(i, 4); 756 757 // Manually terminate the pipeline 758 iter->Stop(); 759 } 760 761 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheClueCApi) { 762 session_id_type env_session; 763 Status s = GetSessionFromEnv(&env_session); 764 EXPECT_EQ(s, Status::OK()); 765 766 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 767 EXPECT_NE(some_cache, nullptr); 768 769 // Create a CLUE Dataset, this file_path has 3 records in it 770 std::string file_path = datasets_root_path_ + "/testCLUE/afqmc/train.json"; 771 std::string task = "AFQMC"; 772 std::string usage = "train"; 773 774 // In this one, a num_samples=2 argument is given. 775 // In this case, a sequential sampler would be chosen with the same num_samples argument. 776 // The samples will be selected by the sequential sampler, not by the CLUE leaf node. 777 std::shared_ptr<Dataset> ds = CLUE({file_path}, task, usage, 2, ShuffleMode::kFalse, 1, 0, some_cache); 778 EXPECT_NE(ds, nullptr); 779 780 // Create a Repeat operation on ds 781 int32_t repeat_num = 2; 782 ds = ds->Repeat(repeat_num); 783 EXPECT_NE(ds, nullptr); 784 785 // Create an iterator over the result of the above dataset 786 // This will trigger the creation of the Execution Tree and launch it. 787 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 788 EXPECT_NE(iter, nullptr); 789 790 // Iterate the dataset and get each row 791 std::unordered_map<std::string, mindspore::MSTensor> row; 792 ASSERT_OK(iter->GetNextRow(&row)); 793 794 uint64_t i = 0; 795 while (row.size() != 0) { 796 i++; 797 ASSERT_OK(iter->GetNextRow(&row)); 798 } 799 800 EXPECT_EQ(i, 4); 801 802 // Manually terminate the pipeline 803 iter->Stop(); 804 } 805 806 TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare1) { 807 session_id_type env_session; 808 Status s = GetSessionFromEnv(&env_session); 809 EXPECT_EQ(s, Status::OK()); 810 811 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 812 EXPECT_NE(some_cache, nullptr); 813 814 // Create an ImageFolder Dataset, this folder_path only has 2 images in it 815 std::string folder_path = datasets_root_path_ + "/testImageNetData/train/"; 816 std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache); 817 EXPECT_NE(ds1, nullptr); 818 std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache); 819 EXPECT_NE(ds2, nullptr); 820 821 // Create and launch the Execution Tree for ds1 822 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator(); 823 EXPECT_NE(iter1, nullptr); 824 // Iterate the dataset and get each row 825 std::unordered_map<std::string, mindspore::MSTensor> row; 826 ASSERT_OK(iter1->GetNextRow(&row)); 827 828 uint64_t i = 0; 829 while (row.size() != 0) { 830 i++; 831 auto image = row["image"]; 832 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 833 ASSERT_OK(iter1->GetNextRow(&row)); 834 } 835 EXPECT_EQ(i, 2); 836 // Manually terminate the pipeline 837 iter1->Stop(); 838 839 // Create and launch the Execution Tree for ds2 840 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator(); 841 EXPECT_NE(iter2, nullptr); 842 // Iterate the dataset and get each row 843 ASSERT_OK(iter2->GetNextRow(&row)); 844 845 i = 0; 846 while (row.size() != 0) { 847 i++; 848 auto image = row["image"]; 849 MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); 850 ASSERT_OK(iter2->GetNextRow(&row)); 851 } 852 EXPECT_EQ(i, 2); 853 854 // Manually terminate the pipeline 855 iter2->Stop(); 856 } 857 858 TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare2) { 859 session_id_type env_session; 860 Status s = GetSessionFromEnv(&env_session); 861 EXPECT_EQ(s, Status::OK()); 862 863 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 864 EXPECT_NE(some_cache, nullptr); 865 866 // Create an ImageFolder Dataset, this folder_path only has 2 images in it 867 std::string folder_path = datasets_root_path_ + "/testImageNetData/train/"; 868 // The first pipeline is ImageFolder with RandomSampler, the second pipeline is ImageFolder with SequentialSampler 869 // Since sampler does not influence the data in the source, these two pipelines can share a common cache. 870 std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(), {}, {}, some_cache); 871 EXPECT_NE(ds1, nullptr); 872 std::shared_ptr<Dataset> ds2 = 873 ImageFolder(folder_path, true, std::make_shared<SequentialSampler>(), {}, {}, some_cache); 874 EXPECT_NE(ds2, nullptr); 875 876 // Create and launch the Execution Tree for ds1 877 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator(); 878 EXPECT_NE(iter1, nullptr); 879 // Iterate the dataset and get each row 880 std::unordered_map<std::string, mindspore::MSTensor> row; 881 ASSERT_OK(iter1->GetNextRow(&row)); 882 883 uint64_t i = 0; 884 while (row.size() != 0) { 885 i++; 886 auto image = row["image"]; 887 ASSERT_OK(iter1->GetNextRow(&row)); 888 } 889 EXPECT_EQ(i, 2); 890 // Manually terminate the pipeline 891 iter1->Stop(); 892 893 // Create and launch the Execution Tree for ds2 894 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator(); 895 EXPECT_NE(iter2, nullptr); 896 // Iterate the dataset and get each row 897 ASSERT_OK(iter2->GetNextRow(&row)); 898 899 i = 0; 900 while (row.size() != 0) { 901 i++; 902 auto image = row["image"]; 903 ASSERT_OK(iter2->GetNextRow(&row)); 904 } 905 EXPECT_EQ(i, 2); 906 907 // Manually terminate the pipeline 908 iter2->Stop(); 909 } 910 911 TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShareFailure1) { 912 session_id_type env_session; 913 Status s = GetSessionFromEnv(&env_session); 914 EXPECT_EQ(s, Status::OK()); 915 916 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false); 917 EXPECT_NE(some_cache, nullptr); 918 919 // Create an ImageFolder Dataset, this folder_path only has 2 images in it 920 std::string folder_path = datasets_root_path_ + "/testImageNetData/train/"; 921 std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(), {}, {}, some_cache); 922 EXPECT_NE(ds1, nullptr); 923 std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache); 924 EXPECT_NE(ds2, nullptr); 925 926 // Create and launch the Execution Tree for ds1 927 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator(); 928 EXPECT_NE(iter1, nullptr); 929 // Iterate the dataset and get each row 930 std::unordered_map<std::string, mindspore::MSTensor> row; 931 ASSERT_OK(iter1->GetNextRow(&row)); 932 933 uint64_t i = 0; 934 while (row.size() != 0) { 935 i++; 936 auto image = row["image"]; 937 ASSERT_OK(iter1->GetNextRow(&row)); 938 } 939 EXPECT_EQ(i, 2); 940 // Manually terminate the pipeline 941 iter1->Stop(); 942 943 // Re-use a cache for the second pipeline would fail 944 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator(); 945 EXPECT_EQ(iter2, nullptr); 946 } 947