1 /** 2 * Copyright 2019-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include <iostream> 17 #include <memory> 18 #include <string> 19 #include "common/common.h" 20 #include "utils/ms_utils.h" 21 #include "minddata/dataset/core/client.h" 22 #include "minddata/dataset/core/global_context.h" 23 #include "minddata/dataset/engine/datasetops/source/image_folder_op.h" 24 #include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h" 25 #include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h" 26 #include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h" 27 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" 28 #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" 29 #include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h" 30 #include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h" 31 #include "minddata/dataset/util/status.h" 32 #include "gtest/gtest.h" 33 #include "utils/log_adapter.h" 34 #include "securec.h" 35 36 namespace common = mindspore::common; 37 38 using namespace mindspore::dataset; 39 using mindspore::LogStream; 40 using mindspore::ExceptionType::NoExceptionType; 41 using mindspore::MsLogLevel::ERROR; 42 43 // std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false); 44 45 // std::shared_ptr<RepeatOp> Repeat(int repeat_cnt); 46 47 // std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops); 48 49 std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int64_t conns, std::string path, 50 bool shuf = false, std::shared_ptr<SamplerRT> sampler = nullptr, 51 std::map<std::string, int32_t> map = {}, bool decode = false) { 52 std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); 53 TensorShape scalar = TensorShape::CreateScalar(); 54 (void)schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)); 55 (void)schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar)); 56 std::set<std::string> ext = {".jpg", ".JPEG"}; 57 if (sampler == nullptr) { 58 int64_t num_samples = 0; // default num samples of 0 means to sample entire set of data 59 int64_t start_index = 0; 60 sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples); 61 } 62 std::shared_ptr<ImageFolderOp> so = 63 std::make_shared<ImageFolderOp>(num_works, path, conns, false, decode, ext, map, std::move(schema), sampler); 64 return so; 65 } 66 67 Status Create1DTensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements, unsigned char *data = nullptr, 68 DataType::Type data_type = DataType::DE_UINT32) { 69 TensorShape shape(std::vector<int64_t>(1, num_elements)); 70 RETURN_IF_NOT_OK(Tensor::CreateFromMemory(shape, DataType(data_type), data, sample_ids)); 71 72 return Status::OK(); 73 } 74 75 class MindDataTestImageFolderSampler : public UT::DatasetOpTesting { 76 protected: 77 }; 78 79 TEST_F(MindDataTestImageFolderSampler, TestSequentialImageFolderWithRepeat) { 80 std::string folder_path = datasets_root_path_ + "/testPK/data"; 81 auto op1 = ImageFolder(16, 2, 32, folder_path, false); 82 auto op2 = Repeat(2); 83 op1->SetTotalRepeats(2); 84 op1->SetNumRepeatsPerEpoch(2); 85 auto tree = Build({op1, op2}); 86 tree->Prepare(); 87 int32_t res[] = {0, 1, 2, 3}; 88 Status rc = tree->Launch(); 89 if (rc.IsError()) { 90 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; 91 EXPECT_TRUE(false); 92 } else { 93 DatasetIterator di(tree); 94 TensorMap tensor_map; 95 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 96 EXPECT_TRUE(rc.IsOk()); 97 uint64_t i = 0; 98 int32_t label = 0; 99 while (tensor_map.size() != 0) { 100 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 101 EXPECT_TRUE(res[(i % 44) / 11] == label); 102 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; 103 i++; 104 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 105 } 106 EXPECT_TRUE(i == 88); 107 } 108 } 109 110 TEST_F(MindDataTestImageFolderSampler, TestRandomImageFolder) { 111 std::string folder_path = datasets_root_path_ + "/testPK/data"; 112 auto tree = Build({ImageFolder(16, 2, 32, folder_path, true, nullptr)}); 113 tree->Prepare(); 114 Status rc = tree->Launch(); 115 if (rc.IsError()) { 116 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; 117 EXPECT_TRUE(false); 118 } else { 119 DatasetIterator di(tree); 120 TensorMap tensor_map; 121 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 122 EXPECT_TRUE(rc.IsOk()); 123 uint64_t i = 0; 124 int32_t label = 0; 125 while (tensor_map.size() != 0) { 126 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 127 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; 128 i++; 129 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 130 } 131 EXPECT_TRUE(i == 44); 132 } 133 } 134 135 TEST_F(MindDataTestImageFolderSampler, TestRandomSamplerImageFolder) { 136 int32_t original_seed = GlobalContext::config_manager()->seed(); 137 GlobalContext::config_manager()->set_seed(0); 138 int64_t num_samples = 12; 139 std::shared_ptr<SamplerRT> sampler = std::make_unique<RandomSamplerRT>(true, num_samples, true); 140 int32_t res[] = {2, 2, 2, 3, 2, 3, 2, 3, 1, 2, 2, 1}; // ground truth label 141 std::string folder_path = datasets_root_path_ + "/testPK/data"; 142 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))}); 143 tree->Prepare(); 144 Status rc = tree->Launch(); 145 if (rc.IsError()) { 146 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; 147 EXPECT_TRUE(false); 148 } else { 149 DatasetIterator di(tree); 150 TensorMap tensor_map; 151 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 152 EXPECT_TRUE(rc.IsOk()); 153 uint64_t i = 0; 154 int32_t label = 0; 155 while (tensor_map.size() != 0) { 156 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 157 EXPECT_TRUE(res[i] == label); 158 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; 159 i++; 160 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 161 } 162 EXPECT_TRUE(i == 12); 163 } 164 GlobalContext::config_manager()->set_seed(original_seed); 165 } 166 167 TEST_F(MindDataTestImageFolderSampler, TestSequentialImageFolderWithRepeatBatch) { 168 std::string folder_path = datasets_root_path_ + "/testPK/data"; 169 auto op1 = ImageFolder(16, 2, 32, folder_path, false); 170 auto op2 = Repeat(2); 171 auto op3 = Batch(11); 172 op1->SetTotalRepeats(2); 173 op1->SetNumRepeatsPerEpoch(2); 174 auto tree = Build({op1, op2, op3}); 175 tree->Prepare(); 176 int32_t res[4][11] = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 177 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 178 {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, 179 {3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}}; 180 Status rc = tree->Launch(); 181 if (rc.IsError()) { 182 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; 183 EXPECT_TRUE(false); 184 } else { 185 DatasetIterator di(tree); 186 TensorMap tensor_map; 187 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 188 EXPECT_TRUE(rc.IsOk()); 189 uint64_t i = 0; 190 while (tensor_map.size() != 0) { 191 std::shared_ptr<Tensor> label; 192 Create1DTensor(&label, 11, reinterpret_cast<unsigned char *>(res[i % 4]), DataType::DE_INT32); 193 EXPECT_TRUE((*label) == (*tensor_map["label"])); 194 MS_LOG(DEBUG) << "row: " << i << " " << tensor_map["image"]->shape() << " (*label):" << (*label) 195 << " *tensor_map[label]: " << *tensor_map["label"] << std::endl; 196 i++; 197 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 198 } 199 EXPECT_TRUE(i == 8); 200 } 201 } 202 203 TEST_F(MindDataTestImageFolderSampler, TestSubsetRandomSamplerImageFolder) { 204 // id range 0 - 10 is label 0, and id range 11 - 21 is label 1 205 std::vector<int64_t> indices({0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11}); 206 int64_t num_samples = 0; 207 std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(indices, num_samples); 208 std::string folder_path = datasets_root_path_ + "/testPK/data"; 209 // Expect 6 samples for label 0 and 1 210 int res[2] = {6, 6}; 211 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))}); 212 tree->Prepare(); 213 Status rc = tree->Launch(); 214 if (rc.IsError()) { 215 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; 216 EXPECT_TRUE(false); 217 } else { 218 DatasetIterator di(tree); 219 TensorMap tensor_map; 220 rc = di.GetNextAsMap(&tensor_map); 221 EXPECT_TRUE(rc.IsOk()); 222 uint64_t i = 0; 223 int32_t label = 0; 224 while (tensor_map.size() != 0) { 225 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 226 res[label]--; 227 i++; 228 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 229 } 230 EXPECT_EQ(res[0], 0); 231 EXPECT_EQ(res[1], 0); 232 EXPECT_TRUE(i == 12); 233 } 234 } 235 236 TEST_F(MindDataTestImageFolderSampler, TestWeightedRandomSamplerImageFolder) { 237 // num samples to draw. 238 int64_t num_samples = 12; 239 int64_t total_samples = 44; 240 int64_t samples_per_tensor = 10; 241 std::vector<double> weights(total_samples, std::rand() % 100); 242 243 // create sampler with replacement = replacement 244 std::shared_ptr<SamplerRT> sampler = 245 std::make_shared<WeightedRandomSamplerRT>(weights, num_samples, true, samples_per_tensor); 246 247 std::string folder_path = datasets_root_path_ + "/testPK/data"; 248 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))}); 249 tree->Prepare(); 250 Status rc = tree->Launch(); 251 if (rc.IsError()) { 252 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; 253 EXPECT_TRUE(false); 254 } else { 255 DatasetIterator di(tree); 256 TensorMap tensor_map; 257 rc = di.GetNextAsMap(&tensor_map); 258 EXPECT_TRUE(rc.IsOk()); 259 uint64_t i = 0; 260 int32_t label = 0; 261 while (tensor_map.size() != 0) { 262 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 263 i++; 264 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 265 } 266 EXPECT_TRUE(i == 12); 267 } 268 } 269 270 TEST_F(MindDataTestImageFolderSampler, TestImageFolderClassIndex) { 271 std::string folder_path = datasets_root_path_ + "/testPK/data"; 272 std::map<std::string, int32_t> map; 273 map["class3"] = 333; 274 map["class1"] = 111; 275 map["wrong folder name"] = 1234; // this is skipped 276 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, nullptr, map)}); 277 int64_t res[2] = {111, 333}; 278 tree->Prepare(); 279 Status rc = tree->Launch(); 280 if (rc.IsError()) { 281 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; 282 EXPECT_TRUE(false); 283 } else { 284 DatasetIterator di(tree); 285 TensorMap tensor_map; 286 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 287 EXPECT_TRUE(rc.IsOk()); 288 uint64_t i = 0; 289 int32_t label = 0; 290 while (tensor_map.size() != 0) { 291 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 292 EXPECT_TRUE(label == res[i / 11]); 293 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; 294 i++; 295 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 296 } 297 EXPECT_TRUE(i == 22); 298 } 299 } 300 301 TEST_F(MindDataTestImageFolderSampler, TestDistributedSampler) { 302 int64_t num_samples = 0; 303 std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(11, 10, false, num_samples); 304 std::string folder_path = datasets_root_path_ + "/testPK/data"; 305 auto op1 = ImageFolder(16, 2, 32, folder_path, false, std::move(sampler)); 306 auto op2 = Repeat(4); 307 op1->SetTotalRepeats(4); 308 op1->SetNumRepeatsPerEpoch(4); 309 auto tree = Build({op1, op2}); 310 tree->Prepare(); 311 Status rc = tree->Launch(); 312 if (rc.IsError()) { 313 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; 314 EXPECT_TRUE(false); 315 } else { 316 DatasetIterator di(tree); 317 TensorMap tensor_map; 318 rc = di.GetNextAsMap(&tensor_map); 319 EXPECT_TRUE(rc.IsOk()); 320 uint64_t i = 0; 321 int32_t label = 0; 322 while (tensor_map.size() != 0) { 323 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 324 EXPECT_EQ(i % 4, label); 325 MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n"; 326 i++; 327 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 328 } 329 EXPECT_TRUE(i == 16); 330 } 331 } 332 333 TEST_F(MindDataTestImageFolderSampler, TestPKSamplerImageFolder) { 334 int64_t num_samples = 0; 335 std::shared_ptr<SamplerRT> sampler = std::make_shared<PKSamplerRT>(3, false, num_samples, 4); 336 int32_t res[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; // ground truth label 337 std::string folder_path = datasets_root_path_ + "/testPK/data"; 338 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))}); 339 tree->Prepare(); 340 Status rc = tree->Launch(); 341 if (rc.IsError()) { 342 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; 343 EXPECT_TRUE(false); 344 } else { 345 DatasetIterator di(tree); 346 TensorMap tensor_map; 347 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 348 EXPECT_TRUE(rc.IsOk()); 349 uint64_t i = 0; 350 int32_t label = 0; 351 while (tensor_map.size() != 0) { 352 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 353 EXPECT_TRUE(res[i] == label); 354 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; 355 i++; 356 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 357 } 358 EXPECT_TRUE(i == 12); 359 } 360 } 361 362 TEST_F(MindDataTestImageFolderSampler, TestImageFolderDecode) { 363 std::string folder_path = datasets_root_path_ + "/testPK/data"; 364 std::map<std::string, int32_t> map; 365 map["class3"] = 333; 366 map["class1"] = 111; 367 map["wrong folder name"] = 1234; // this is skipped 368 int64_t num_samples = 20; 369 int64_t start_index = 0; 370 auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples); 371 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(seq_sampler), map, true)}); 372 int64_t res[2] = {111, 333}; 373 tree->Prepare(); 374 Status rc = tree->Launch(); 375 if (rc.IsError()) { 376 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; 377 EXPECT_TRUE(false); 378 } else { 379 DatasetIterator di(tree); 380 TensorMap tensor_map; 381 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 382 EXPECT_TRUE(rc.IsOk()); 383 uint64_t i = 0; 384 int32_t label = 0; 385 while (tensor_map.size() != 0) { 386 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 387 EXPECT_TRUE(label == res[i / 11]); 388 EXPECT_TRUE(tensor_map["image"]->shape() == 389 TensorShape({2268, 4032, 3})); // verify shapes are correct after decode 390 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; 391 i++; 392 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 393 } 394 EXPECT_TRUE(i == 20); 395 } 396 } 397 398 TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding1) { 399 int64_t num_samples = 5; 400 std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(4, 0, false, num_samples); 401 std::string folder_path = datasets_root_path_ + "/testPK/data"; 402 // numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode 403 auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler), {})}); 404 tree->Prepare(); 405 Status rc = tree->Launch(); 406 int32_t labels[5] = {0, 0, 0, 1, 1}; 407 if (rc.IsError()) { 408 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; 409 EXPECT_TRUE(false); 410 } else { 411 DatasetIterator di(tree); 412 TensorMap tensor_map; 413 rc = di.GetNextAsMap(&tensor_map); 414 EXPECT_TRUE(rc.IsOk()); 415 uint64_t i = 0; 416 int32_t label = 0; 417 while (tensor_map.size() != 0) { 418 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 419 EXPECT_EQ(labels[i], label); 420 MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n"; 421 i++; 422 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 423 } 424 EXPECT_TRUE(i == 5); 425 } 426 } 427 428 TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding2) { 429 int64_t num_samples = 12; 430 std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(4, 3, false, num_samples); 431 std::string folder_path = datasets_root_path_ + "/testPK/data"; 432 // numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode 433 auto tree = Build({ImageFolder(16, 16, 32, folder_path, false, std::move(sampler), {})}); 434 tree->Prepare(); 435 Status rc = tree->Launch(); 436 uint32_t labels[11] = {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; 437 if (rc.IsError()) { 438 MS_LOG(ERROR) << "Return code error detected during tree launch: " << common::SafeCStr(rc.ToString()) << "."; 439 EXPECT_TRUE(false); 440 } else { 441 DatasetIterator di(tree); 442 TensorMap tensor_map; 443 rc = di.GetNextAsMap(&tensor_map); 444 EXPECT_TRUE(rc.IsOk()); 445 uint64_t i = 0; 446 int32_t label = 0; 447 while (tensor_map.size() != 0) { 448 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 449 EXPECT_EQ(labels[i], label); 450 MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n"; 451 i++; 452 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 453 } 454 EXPECT_TRUE(i == 11); 455 } 456 } 457