1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "common/common.h"
17 #include "minddata/dataset/include/dataset/datasets.h"
18 #include "minddata/dataset/include/dataset/vision.h"
19
20 using namespace mindspore::dataset;
21
22 // Helper function to get the session id from SESSION_ID env variable
23 Status GetSessionFromEnv(session_id_type *session_id);
24
25 class MindDataTestCacheOp : public UT::DatasetOpTesting {
26 public:
SetUp()27 void SetUp() override { DatasetOpTesting::SetUp(); }
28 };
29
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheCApiSamplerNull)30 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiSamplerNull) {
31 session_id_type env_session;
32 Status s = GetSessionFromEnv(&env_session);
33 EXPECT_EQ(s, Status::OK());
34
35 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false, "127.0.0.1", 50053, 1, 1);
36 EXPECT_NE(some_cache, nullptr);
37
38 // Create an ImageFolder Dataset, this folder_path only has 2 images in it
39 std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
40 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, nullptr, {}, {}, some_cache);
41 EXPECT_NE(ds, nullptr);
42
43 // Create an iterator over the result of the above dataset
44 // This will trigger the creation of the Execution Tree and launch it.
45 // Now the parameter check for ImageFolderNode would fail and we would end up with a nullptr iter.
46 std::shared_ptr<Iterator> iter = ds->CreateIterator();
47 EXPECT_EQ(iter, nullptr);
48 }
49
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheCApiNestedCache)50 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiNestedCache) {
51 session_id_type env_session;
52 Status s = GetSessionFromEnv(&env_session);
53 EXPECT_EQ(s, Status::OK());
54
55 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
56 EXPECT_NE(some_cache, nullptr);
57
58 // Create an ImageFolder Dataset, this folder_path only has 2 images in it
59 std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
60 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
61 EXPECT_NE(ds, nullptr);
62
63 // Create objects for the tensor ops
64 std::shared_ptr<TensorTransform> decode_op = std::make_shared<vision::Decode>();
65 EXPECT_NE(decode_op, nullptr);
66
67 // Create a Map operation on ds
68 ds = ds->Map({decode_op}, {}, {}, {"image"}, some_cache);
69 EXPECT_NE(ds, nullptr);
70
71 // Create an iterator over the result of the above dataset
72 // This will trigger the creation of the Execution Tree and launch it.
73 // Now in the cache_error_pass would fail and we would end up with a nullptr iter.
74 std::shared_ptr<Iterator> iter = ds->CreateIterator();
75 EXPECT_EQ(iter, nullptr);
76 }
77
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheImageFolderCApi)78 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheImageFolderCApi) {
79 session_id_type env_session;
80 Status s = GetSessionFromEnv(&env_session);
81 EXPECT_EQ(s, Status::OK());
82
83 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
84 EXPECT_NE(some_cache, nullptr);
85
86 // Create an ImageFolder Dataset, this folder_path only has 2 images in it
87 std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
88 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
89 EXPECT_NE(ds, nullptr);
90
91 // Create a Repeat operation on ds
92 int32_t repeat_num = 2;
93 ds = ds->Repeat(repeat_num);
94 EXPECT_NE(ds, nullptr);
95
96 // Create an iterator over the result of the above dataset
97 // This will trigger the creation of the Execution Tree and launch it.
98 std::shared_ptr<Iterator> iter = ds->CreateIterator();
99 EXPECT_NE(iter, nullptr);
100
101 // Iterate the dataset and get each row
102 std::unordered_map<std::string, mindspore::MSTensor> row;
103 ASSERT_OK(iter->GetNextRow(&row));
104
105 uint64_t i = 0;
106 while (row.size() != 0) {
107 i++;
108 auto image = row["image"];
109 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
110 ASSERT_OK(iter->GetNextRow(&row));
111 }
112
113 EXPECT_EQ(i, 4);
114
115 // Manually terminate the pipeline
116 iter->Stop();
117 }
118
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheCocoCApi)119 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCocoCApi) {
120 session_id_type env_session;
121 Status s = GetSessionFromEnv(&env_session);
122 EXPECT_EQ(s, Status::OK());
123
124 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
125 EXPECT_NE(some_cache, nullptr);
126
127 // Create a Coco Dataset, this folder_path has 6 images in it
128 std::string folder_path = datasets_root_path_ + "/testCOCO/train/";
129 std::string annotation_file_path = datasets_root_path_ + "/testCOCO/annotations/train.json";
130 std::shared_ptr<Dataset> ds =
131 Coco(folder_path, annotation_file_path, "Detection", false, std::make_shared<RandomSampler>(), some_cache);
132 EXPECT_NE(ds, nullptr);
133
134 // Create a Repeat operation on ds
135 int32_t repeat_num = 2;
136 ds = ds->Repeat(repeat_num);
137 EXPECT_NE(ds, nullptr);
138
139 // Create an iterator over the result of the above dataset
140 // This will trigger the creation of the Execution Tree and launch it.
141 std::shared_ptr<Iterator> iter = ds->CreateIterator();
142 EXPECT_NE(iter, nullptr);
143
144 // Iterate the dataset and get each row
145 std::unordered_map<std::string, mindspore::MSTensor> row;
146 ASSERT_OK(iter->GetNextRow(&row));
147
148 uint64_t i = 0;
149 while (row.size() != 0) {
150 i++;
151 auto image = row["image"];
152 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
153 ASSERT_OK(iter->GetNextRow(&row));
154 }
155
156 EXPECT_EQ(i, 12);
157
158 // Manually terminate the pipeline
159 iter->Stop();
160 }
161
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheMnistCApi)162 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheMnistCApi) {
163 session_id_type env_session;
164 Status s = GetSessionFromEnv(&env_session);
165 EXPECT_EQ(s, Status::OK());
166
167 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
168 EXPECT_NE(some_cache, nullptr);
169
170 // Create a Mnist Dataset
171 std::string folder_path = datasets_root_path_ + "/testMnistData/";
172 std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache);
173 EXPECT_NE(ds, nullptr);
174
175 // Create a Repeat operation on ds
176 int32_t repeat_num = 2;
177 ds = ds->Repeat(repeat_num);
178 EXPECT_NE(ds, nullptr);
179
180 // Create an iterator over the result of the above dataset
181 // This will trigger the creation of the Execution Tree and launch it.
182 std::shared_ptr<Iterator> iter = ds->CreateIterator();
183 EXPECT_NE(iter, nullptr);
184
185 // Iterate the dataset and get each row
186 std::unordered_map<std::string, mindspore::MSTensor> row;
187 ASSERT_OK(iter->GetNextRow(&row));
188
189 uint64_t i = 0;
190 while (row.size() != 0) {
191 i++;
192 auto image = row["image"];
193 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
194 ASSERT_OK(iter->GetNextRow(&row));
195 }
196
197 EXPECT_EQ(i, 20);
198
199 // Manually terminate the pipeline
200 iter->Stop();
201 }
202
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheCelebaCApi)203 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCelebaCApi) {
204 session_id_type env_session;
205 Status s = GetSessionFromEnv(&env_session);
206 EXPECT_EQ(s, Status::OK());
207
208 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
209 EXPECT_NE(some_cache, nullptr);
210
211 // Create a CelebA Dataset, this folder_path has 4 records in it
212 std::string folder_path = datasets_root_path_ + "/testCelebAData/";
213 std::shared_ptr<Dataset> ds =
214 CelebA(folder_path, "all", std::make_shared<RandomSampler>(false, 10), false, {}, some_cache);
215 EXPECT_NE(ds, nullptr);
216
217 // Create a Repeat operation on ds
218 int32_t repeat_num = 2;
219 ds = ds->Repeat(repeat_num);
220 EXPECT_NE(ds, nullptr);
221
222 // Create an iterator over the result of the above dataset
223 // This will trigger the creation of the Execution Tree and launch it.
224 std::shared_ptr<Iterator> iter = ds->CreateIterator();
225 EXPECT_NE(iter, nullptr);
226
227 // Iterate the dataset and get each row
228 std::unordered_map<std::string, mindspore::MSTensor> row;
229 ASSERT_OK(iter->GetNextRow(&row));
230
231 uint64_t i = 0;
232 while (row.size() != 0) {
233 i++;
234 auto image = row["image"];
235 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
236 ASSERT_OK(iter->GetNextRow(&row));
237 }
238
239 EXPECT_EQ(i, 8);
240
241 // Manually terminate the pipeline
242 iter->Stop();
243 }
244
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheManifestCApi)245 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheManifestCApi) {
246 session_id_type env_session;
247 Status s = GetSessionFromEnv(&env_session);
248 EXPECT_EQ(s, Status::OK());
249
250 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
251 EXPECT_NE(some_cache, nullptr);
252
253 // Create a Manifest Dataset, this file_path has 2 records in it
254 std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
255 std::shared_ptr<Dataset> ds = Manifest(file_path, "train", std::make_shared<RandomSampler>(), {}, false, some_cache);
256 EXPECT_NE(ds, nullptr);
257
258 // Create a Repeat operation on ds
259 int32_t repeat_num = 2;
260 ds = ds->Repeat(repeat_num);
261 EXPECT_NE(ds, nullptr);
262
263 // Create an iterator over the result of the above dataset
264 // This will trigger the creation of the Execution Tree and launch it.
265 std::shared_ptr<Iterator> iter = ds->CreateIterator();
266 EXPECT_NE(iter, nullptr);
267
268 // Iterate the dataset and get each row
269 std::unordered_map<std::string, mindspore::MSTensor> row;
270 ASSERT_OK(iter->GetNextRow(&row));
271
272 uint64_t i = 0;
273 while (row.size() != 0) {
274 i++;
275 auto image = row["image"];
276 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
277 ASSERT_OK(iter->GetNextRow(&row));
278 }
279
280 EXPECT_EQ(i, 4);
281
282 // Manually terminate the pipeline
283 iter->Stop();
284 }
285
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheCifar10CApi)286 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar10CApi) {
287 session_id_type env_session;
288 Status s = GetSessionFromEnv(&env_session);
289 EXPECT_EQ(s, Status::OK());
290
291 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
292 EXPECT_NE(some_cache, nullptr);
293
294 // Create a Cifar10 Dataset
295 std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
296 std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache);
297 EXPECT_NE(ds, nullptr);
298
299 // Create a Repeat operation on ds
300 int32_t repeat_num = 2;
301 ds = ds->Repeat(repeat_num);
302 EXPECT_NE(ds, nullptr);
303
304 // Create an iterator over the result of the above dataset
305 // This will trigger the creation of the Execution Tree and launch it.
306 std::shared_ptr<Iterator> iter = ds->CreateIterator();
307 EXPECT_NE(iter, nullptr);
308
309 // Iterate the dataset and get each row
310 std::unordered_map<std::string, mindspore::MSTensor> row;
311 ASSERT_OK(iter->GetNextRow(&row));
312
313 uint64_t i = 0;
314 while (row.size() != 0) {
315 i++;
316 auto image = row["image"];
317 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
318 ASSERT_OK(iter->GetNextRow(&row));
319 }
320
321 EXPECT_EQ(i, 20);
322
323 // Manually terminate the pipeline
324 iter->Stop();
325 }
326
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheCifar100CApi)327 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar100CApi) {
328 session_id_type env_session;
329 Status s = GetSessionFromEnv(&env_session);
330 EXPECT_EQ(s, Status::OK());
331
332 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
333 EXPECT_NE(some_cache, nullptr);
334
335 // Create a Cifar100 Dataset
336 std::string folder_path = datasets_root_path_ + "/testCifar100Data/";
337 std::shared_ptr<Dataset> ds = Cifar100(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache);
338 EXPECT_NE(ds, nullptr);
339
340 // Create a Repeat operation on ds
341 int32_t repeat_num = 2;
342 ds = ds->Repeat(repeat_num);
343 EXPECT_NE(ds, nullptr);
344
345 // Create an iterator over the result of the above dataset
346 // This will trigger the creation of the Execution Tree and launch it.
347 std::shared_ptr<Iterator> iter = ds->CreateIterator();
348 EXPECT_NE(iter, nullptr);
349
350 // Iterate the dataset and get each row
351 std::unordered_map<std::string, mindspore::MSTensor> row;
352 ASSERT_OK(iter->GetNextRow(&row));
353
354 uint64_t i = 0;
355 while (row.size() != 0) {
356 i++;
357 auto image = row["image"];
358 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
359 ASSERT_OK(iter->GetNextRow(&row));
360 }
361
362 EXPECT_EQ(i, 20);
363
364 // Manually terminate the pipeline
365 iter->Stop();
366 }
367
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheVocCApi)368 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheVocCApi) {
369 session_id_type env_session;
370 Status s = GetSessionFromEnv(&env_session);
371 EXPECT_EQ(s, Status::OK());
372
373 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
374 EXPECT_NE(some_cache, nullptr);
375
376 // Create a VOC Dataset, this folder_path has 9 records in it
377 std::string folder_path = datasets_root_path_ + "/testVOC2012/";
378 std::shared_ptr<Dataset> ds =
379 VOC(folder_path, "Detection", "train", {}, false, std::make_shared<RandomSampler>(), some_cache);
380 EXPECT_NE(ds, nullptr);
381
382 // Create a Repeat operation on ds
383 int32_t repeat_num = 2;
384 ds = ds->Repeat(repeat_num);
385 EXPECT_NE(ds, nullptr);
386
387 // Create an iterator over the result of the above dataset
388 // This will trigger the creation of the Execution Tree and launch it.
389 std::shared_ptr<Iterator> iter = ds->CreateIterator();
390 EXPECT_NE(iter, nullptr);
391
392 // Iterate the dataset and get each row
393 std::unordered_map<std::string, mindspore::MSTensor> row;
394 ASSERT_OK(iter->GetNextRow(&row));
395
396 uint64_t i = 0;
397 while (row.size() != 0) {
398 i++;
399 auto image = row["image"];
400 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
401 ASSERT_OK(iter->GetNextRow(&row));
402 }
403
404 EXPECT_EQ(i, 18);
405
406 // Manually terminate the pipeline
407 iter->Stop();
408 }
409
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheAlbumCApi)410 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheAlbumCApi) {
411 session_id_type env_session;
412 Status s = GetSessionFromEnv(&env_session);
413 EXPECT_EQ(s, Status::OK());
414
415 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
416 EXPECT_NE(some_cache, nullptr);
417
418 std::string folder_path = datasets_root_path_ + "/testAlbum/images";
419 std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
420 std::vector<std::string> column_names = {"image", "label", "id"};
421 // Create a Album Dataset, 7 records in it
422 std::shared_ptr<Dataset> ds =
423 Album(folder_path, schema_file, column_names, false, std::make_shared<RandomSampler>(), some_cache);
424 EXPECT_NE(ds, nullptr);
425
426 // Create a Repeat operation on ds
427 int32_t repeat_num = 2;
428 ds = ds->Repeat(repeat_num);
429 EXPECT_NE(ds, nullptr);
430
431 // Create an iterator over the result of the above dataset
432 // This will trigger the creation of the Execution Tree and launch it.
433 std::shared_ptr<Iterator> iter = ds->CreateIterator();
434 EXPECT_NE(iter, nullptr);
435
436 // Iterate the dataset and get each row
437 std::unordered_map<std::string, mindspore::MSTensor> row;
438 ASSERT_OK(iter->GetNextRow(&row));
439
440 uint64_t i = 0;
441 while (row.size() != 0) {
442 i++;
443 ASSERT_OK(iter->GetNextRow(&row));
444 }
445
446 EXPECT_EQ(i, 14);
447
448 // Manually terminate the pipeline
449 iter->Stop();
450 }
451
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheMindRecordCApi)452 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheMindRecordCApi) {
453 session_id_type env_session;
454 Status s = GetSessionFromEnv(&env_session);
455 EXPECT_EQ(s, Status::OK());
456
457 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
458 EXPECT_NE(some_cache, nullptr);
459
460 // Create a MindData Dataset
461 // Pass one mindrecord shard file to parse dataset info, and search for other mindrecord files with same dataset info,
462 // thus all records in imagenet.mindrecord0 ~ imagenet.mindrecord3 will be read
463 std::string file_path = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
464
465 // Create a MindRecord Dataset, 20 records in it
466 std::shared_ptr<Dataset> ds = MindData(file_path, {}, std::make_shared<RandomSampler>(), nullptr, 0,
467 ShuffleMode::kGlobal, some_cache);
468 EXPECT_NE(ds, nullptr);
469
470 // Create an iterator over the result of the above dataset
471 // This will trigger the creation of the Execution Tree and launch it.
472 std::shared_ptr<Iterator> iter = ds->CreateIterator();
473 EXPECT_NE(iter, nullptr);
474
475 // Iterate the dataset and get each row
476 std::unordered_map<std::string, mindspore::MSTensor> row;
477 ASSERT_OK(iter->GetNextRow(&row));
478
479 uint64_t i = 0;
480 while (row.size() != 0) {
481 i++;
482 ASSERT_OK(iter->GetNextRow(&row));
483 }
484
485 EXPECT_EQ(i, 20);
486
487 // Manually terminate the pipeline
488 iter->Stop();
489 }
490
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheRandomDataCApi)491 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataCApi) {
492 session_id_type env_session;
493 Status s = GetSessionFromEnv(&env_session);
494 EXPECT_EQ(s, Status::OK());
495
496 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
497 EXPECT_NE(some_cache, nullptr);
498
499 // Create a RandomDataset
500 std::shared_ptr<SchemaObj> schema = Schema();
501
502 ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2}));
503 ASSERT_OK(schema->add_column("label", mindspore::DataType::kNumberTypeUInt8, {1}));
504 std::shared_ptr<Dataset> ds = RandomData(8, schema, {}, some_cache);
505 EXPECT_NE(ds, nullptr);
506
507 // Create a Repeat operation on ds
508 int32_t repeat_num = 2;
509 ds = ds->Repeat(repeat_num);
510 EXPECT_NE(ds, nullptr);
511
512 // Create an iterator over the result of the above dataset
513 // This will trigger the creation of the Execution Tree and launch it.
514 std::shared_ptr<Iterator> iter = ds->CreateIterator();
515 EXPECT_NE(iter, nullptr);
516
517 // Iterate the dataset and get each row
518 std::unordered_map<std::string, mindspore::MSTensor> row;
519 ASSERT_OK(iter->GetNextRow(&row));
520
521 uint64_t i = 0;
522 while (row.size() != 0) {
523 i++;
524 ASSERT_OK(iter->GetNextRow(&row));
525 }
526
527 EXPECT_EQ(i, 16);
528
529 // Manually terminate the pipeline
530 iter->Stop();
531 }
532
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheTFRecordCApi1)533 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi1) {
534 session_id_type env_session;
535 Status s = GetSessionFromEnv(&env_session);
536 EXPECT_EQ(s, Status::OK());
537
538 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
539 EXPECT_NE(some_cache, nullptr);
540
541 // Create a TFRecord Dataset, this file_path has 3 records in it
542 std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
543 std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
544 std::shared_ptr<Dataset> ds =
545 TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 1, 0, false, some_cache);
546 EXPECT_NE(ds, nullptr);
547
548 // Create a Repeat operation on ds
549 int32_t repeat_num = 2;
550 ds = ds->Repeat(repeat_num);
551 EXPECT_NE(ds, nullptr);
552
553 // Create an iterator over the result of the above dataset
554 // This will trigger the creation of the Execution Tree and launch it.
555 std::shared_ptr<Iterator> iter = ds->CreateIterator();
556 EXPECT_NE(iter, nullptr);
557
558 // Iterate the dataset and get each row
559 std::unordered_map<std::string, mindspore::MSTensor> row;
560 ASSERT_OK(iter->GetNextRow(&row));
561
562 uint64_t i = 0;
563 while (row.size() != 0) {
564 i++;
565 auto image = row["image"];
566 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
567 ASSERT_OK(iter->GetNextRow(&row));
568 }
569
570 EXPECT_EQ(i, 6);
571
572 // Manually terminate the pipeline
573 iter->Stop();
574 }
575
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheTFRecordCApi2)576 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi2) {
577 session_id_type env_session;
578 Status s = GetSessionFromEnv(&env_session);
579 EXPECT_EQ(s, Status::OK());
580
581 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
582 EXPECT_NE(some_cache, nullptr);
583
584 // Create a TFRecord Dataset, this file_path has 3 records in it
585 std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
586 std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
587
588 // In this one, the TFRecord dataset will be given sharding configuration, however since a cache is
589 // used, the tree prepare should undo the sharding configuration and instead, a distributed
590 // sampler will be chosen with the same shard config.
591 // With only 3 records shard into 3, we expect only 1 record returned for this shard
592 // However, the sharding will be done by the sampler, not by the TFRecord leaf node
593 // In this case, it is a row-based sharding, not the file-based sharding that would happen if
594 // there was not any cache.
595 std::shared_ptr<Dataset> ds =
596 TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 3, 0, false, some_cache);
597 EXPECT_NE(ds, nullptr);
598
599 // Create a Repeat operation on ds
600 int32_t repeat_num = 2;
601 ds = ds->Repeat(repeat_num);
602 EXPECT_NE(ds, nullptr);
603
604 // Create an iterator over the result of the above dataset
605 // This will trigger the creation of the Execution Tree and launch it.
606 std::shared_ptr<Iterator> iter = ds->CreateIterator();
607 EXPECT_NE(iter, nullptr);
608
609 // Iterate the dataset and get each row
610 std::unordered_map<std::string, mindspore::MSTensor> row;
611 ASSERT_OK(iter->GetNextRow(&row));
612
613 uint64_t i = 0;
614 while (row.size() != 0) {
615 i++;
616 auto image = row["image"];
617 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
618 ASSERT_OK(iter->GetNextRow(&row));
619 }
620
621 EXPECT_EQ(i, 2);
622
623 // Manually terminate the pipeline
624 iter->Stop();
625 }
626
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheTFRecordCApi3)627 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi3) {
628 session_id_type env_session;
629 Status s = GetSessionFromEnv(&env_session);
630 EXPECT_EQ(s, Status::OK());
631
632 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
633 EXPECT_NE(some_cache, nullptr);
634
635 // Create a TFRecord Dataset, this file_path has 3 records in it
636 std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
637 std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
638
639 // In this one, a num_samples argument is given.
640 // In this case, a sequential sampler would be chosen with the same num_samples argument.
641 // The samples will be selected by the sequential sampler, not by the TFRecord leaf node.
642 std::shared_ptr<Dataset> ds =
643 TFRecord({file_path}, schema_path, {"image"}, 2, ShuffleMode::kFalse, 1, 0, false, some_cache);
644 EXPECT_NE(ds, nullptr);
645
646 // Create a Repeat operation on ds
647 int32_t repeat_num = 2;
648 ds = ds->Repeat(repeat_num);
649 EXPECT_NE(ds, nullptr);
650
651 // Create an iterator over the result of the above dataset
652 // This will trigger the creation of the Execution Tree and launch it.
653 std::shared_ptr<Iterator> iter = ds->CreateIterator();
654 EXPECT_NE(iter, nullptr);
655
656 // Iterate the dataset and get each row
657 std::unordered_map<std::string, mindspore::MSTensor> row;
658 ASSERT_OK(iter->GetNextRow(&row));
659
660 uint64_t i = 0;
661 while (row.size() != 0) {
662 i++;
663 auto image = row["image"];
664 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
665 ASSERT_OK(iter->GetNextRow(&row));
666 }
667
668 EXPECT_EQ(i, 4);
669
670 // Manually terminate the pipeline
671 iter->Stop();
672 }
673
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheTextfileCApi)674 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTextfileCApi) {
675 session_id_type env_session;
676 Status s = GetSessionFromEnv(&env_session);
677 EXPECT_EQ(s, Status::OK());
678
679 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
680 EXPECT_NE(some_cache, nullptr);
681
682 // Create a TextFile Dataset, this file_path has 3 records in it
683 std::string file_path = datasets_root_path_ + "/testTextFileDataset/1.txt";
684
685 // In this one, a num_samples=2 argument is given.
686 // In this case, a sequential sampler would be chosen with the same num_samples argument.
687 // The samples will be selected by the sequential sampler, not by the TextFile leaf node.
688 std::shared_ptr<Dataset> ds = TextFile({file_path}, 2, ShuffleMode::kGlobal, 1, 0, some_cache);
689 EXPECT_NE(ds, nullptr);
690
691 // Create a Repeat operation on ds
692 int32_t repeat_num = 2;
693 ds = ds->Repeat(repeat_num);
694 EXPECT_NE(ds, nullptr);
695
696 // Create an iterator over the result of the above dataset
697 // This will trigger the creation of the Execution Tree and launch it.
698 std::shared_ptr<Iterator> iter = ds->CreateIterator();
699 EXPECT_NE(iter, nullptr);
700
701 // Iterate the dataset and get each row
702 std::unordered_map<std::string, mindspore::MSTensor> row;
703 ASSERT_OK(iter->GetNextRow(&row));
704
705 uint64_t i = 0;
706 while (row.size() != 0) {
707 i++;
708 ASSERT_OK(iter->GetNextRow(&row));
709 }
710
711 EXPECT_EQ(i, 4);
712
713 // Manually terminate the pipeline
714 iter->Stop();
715 }
716
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheCsvCApi)717 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCsvCApi) {
718 session_id_type env_session;
719 Status s = GetSessionFromEnv(&env_session);
720 EXPECT_EQ(s, Status::OK());
721
722 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
723 EXPECT_NE(some_cache, nullptr);
724
725 // Create a CSV Dataset, this file_path has 3 records in it
726 std::string file_path = datasets_root_path_ + "/testCSV/1.csv";
727 std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"};
728
729 // In this one, a num_samples=2 argument is given.
730 // In this case, a sequential sampler would be chosen with the same num_samples argument.
731 // The samples will be selected by the sequential sampler, not by the CSV leaf node.
732 std::shared_ptr<Dataset> ds = CSV({file_path}, ',', {}, column_names, 2, ShuffleMode::kFalse, 1, 0, some_cache);
733 EXPECT_NE(ds, nullptr);
734
735 // Create a Repeat operation on ds
736 int32_t repeat_num = 2;
737 ds = ds->Repeat(repeat_num);
738 EXPECT_NE(ds, nullptr);
739
740 // Create an iterator over the result of the above dataset
741 // This will trigger the creation of the Execution Tree and launch it.
742 std::shared_ptr<Iterator> iter = ds->CreateIterator();
743 EXPECT_NE(iter, nullptr);
744
745 // Iterate the dataset and get each row
746 std::unordered_map<std::string, mindspore::MSTensor> row;
747 ASSERT_OK(iter->GetNextRow(&row));
748
749 uint64_t i = 0;
750 while (row.size() != 0) {
751 i++;
752 ASSERT_OK(iter->GetNextRow(&row));
753 }
754
755 EXPECT_EQ(i, 4);
756
757 // Manually terminate the pipeline
758 iter->Stop();
759 }
760
TEST_F(MindDataTestCacheOp,DISABLED_TestCacheClueCApi)761 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheClueCApi) {
762 session_id_type env_session;
763 Status s = GetSessionFromEnv(&env_session);
764 EXPECT_EQ(s, Status::OK());
765
766 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
767 EXPECT_NE(some_cache, nullptr);
768
769 // Create a CLUE Dataset, this file_path has 3 records in it
770 std::string file_path = datasets_root_path_ + "/testCLUE/afqmc/train.json";
771 std::string task = "AFQMC";
772 std::string usage = "train";
773
774 // In this one, a num_samples=2 argument is given.
775 // In this case, a sequential sampler would be chosen with the same num_samples argument.
776 // The samples will be selected by the sequential sampler, not by the CLUE leaf node.
777 std::shared_ptr<Dataset> ds = CLUE({file_path}, task, usage, 2, ShuffleMode::kFalse, 1, 0, some_cache);
778 EXPECT_NE(ds, nullptr);
779
780 // Create a Repeat operation on ds
781 int32_t repeat_num = 2;
782 ds = ds->Repeat(repeat_num);
783 EXPECT_NE(ds, nullptr);
784
785 // Create an iterator over the result of the above dataset
786 // This will trigger the creation of the Execution Tree and launch it.
787 std::shared_ptr<Iterator> iter = ds->CreateIterator();
788 EXPECT_NE(iter, nullptr);
789
790 // Iterate the dataset and get each row
791 std::unordered_map<std::string, mindspore::MSTensor> row;
792 ASSERT_OK(iter->GetNextRow(&row));
793
794 uint64_t i = 0;
795 while (row.size() != 0) {
796 i++;
797 ASSERT_OK(iter->GetNextRow(&row));
798 }
799
800 EXPECT_EQ(i, 4);
801
802 // Manually terminate the pipeline
803 iter->Stop();
804 }
805
TEST_F(MindDataTestCacheOp,DISABLED_TestCApiCacheShare1)806 TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare1) {
807 session_id_type env_session;
808 Status s = GetSessionFromEnv(&env_session);
809 EXPECT_EQ(s, Status::OK());
810
811 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
812 EXPECT_NE(some_cache, nullptr);
813
814 // Create an ImageFolder Dataset, this folder_path only has 2 images in it
815 std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
816 std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
817 EXPECT_NE(ds1, nullptr);
818 std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
819 EXPECT_NE(ds2, nullptr);
820
821 // Create and launch the Execution Tree for ds1
822 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
823 EXPECT_NE(iter1, nullptr);
824 // Iterate the dataset and get each row
825 std::unordered_map<std::string, mindspore::MSTensor> row;
826 ASSERT_OK(iter1->GetNextRow(&row));
827
828 uint64_t i = 0;
829 while (row.size() != 0) {
830 i++;
831 auto image = row["image"];
832 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
833 ASSERT_OK(iter1->GetNextRow(&row));
834 }
835 EXPECT_EQ(i, 2);
836 // Manually terminate the pipeline
837 iter1->Stop();
838
839 // Create and launch the Execution Tree for ds2
840 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
841 EXPECT_NE(iter2, nullptr);
842 // Iterate the dataset and get each row
843 ASSERT_OK(iter2->GetNextRow(&row));
844
845 i = 0;
846 while (row.size() != 0) {
847 i++;
848 auto image = row["image"];
849 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
850 ASSERT_OK(iter2->GetNextRow(&row));
851 }
852 EXPECT_EQ(i, 2);
853
854 // Manually terminate the pipeline
855 iter2->Stop();
856 }
857
TEST_F(MindDataTestCacheOp,DISABLED_TestCApiCacheShare2)858 TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare2) {
859 session_id_type env_session;
860 Status s = GetSessionFromEnv(&env_session);
861 EXPECT_EQ(s, Status::OK());
862
863 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
864 EXPECT_NE(some_cache, nullptr);
865
866 // Create an ImageFolder Dataset, this folder_path only has 2 images in it
867 std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
868 // The first pipeline is ImageFolder with RandomSampler, the second pipeline is ImageFolder with SequentialSampler
869 // Since sampler does not influence the data in the source, these two pipelines can share a common cache.
870 std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(), {}, {}, some_cache);
871 EXPECT_NE(ds1, nullptr);
872 std::shared_ptr<Dataset> ds2 =
873 ImageFolder(folder_path, true, std::make_shared<SequentialSampler>(), {}, {}, some_cache);
874 EXPECT_NE(ds2, nullptr);
875
876 // Create and launch the Execution Tree for ds1
877 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
878 EXPECT_NE(iter1, nullptr);
879 // Iterate the dataset and get each row
880 std::unordered_map<std::string, mindspore::MSTensor> row;
881 ASSERT_OK(iter1->GetNextRow(&row));
882
883 uint64_t i = 0;
884 while (row.size() != 0) {
885 i++;
886 auto image = row["image"];
887 ASSERT_OK(iter1->GetNextRow(&row));
888 }
889 EXPECT_EQ(i, 2);
890 // Manually terminate the pipeline
891 iter1->Stop();
892
893 // Create and launch the Execution Tree for ds2
894 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
895 EXPECT_NE(iter2, nullptr);
896 // Iterate the dataset and get each row
897 ASSERT_OK(iter2->GetNextRow(&row));
898
899 i = 0;
900 while (row.size() != 0) {
901 i++;
902 auto image = row["image"];
903 ASSERT_OK(iter2->GetNextRow(&row));
904 }
905 EXPECT_EQ(i, 2);
906
907 // Manually terminate the pipeline
908 iter2->Stop();
909 }
910
TEST_F(MindDataTestCacheOp,DISABLED_TestCApiCacheShareFailure1)911 TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShareFailure1) {
912 session_id_type env_session;
913 Status s = GetSessionFromEnv(&env_session);
914 EXPECT_EQ(s, Status::OK());
915
916 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
917 EXPECT_NE(some_cache, nullptr);
918
919 // Create an ImageFolder Dataset, this folder_path only has 2 images in it
920 std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
921 std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(), {}, {}, some_cache);
922 EXPECT_NE(ds1, nullptr);
923 std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
924 EXPECT_NE(ds2, nullptr);
925
926 // Create and launch the Execution Tree for ds1
927 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
928 EXPECT_NE(iter1, nullptr);
929 // Iterate the dataset and get each row
930 std::unordered_map<std::string, mindspore::MSTensor> row;
931 ASSERT_OK(iter1->GetNextRow(&row));
932
933 uint64_t i = 0;
934 while (row.size() != 0) {
935 i++;
936 auto image = row["image"];
937 ASSERT_OK(iter1->GetNextRow(&row));
938 }
939 EXPECT_EQ(i, 2);
940 // Manually terminate the pipeline
941 iter1->Stop();
942
943 // Re-use a cache for the second pipeline would fail
944 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
945 EXPECT_EQ(iter2, nullptr);
946 }
947