• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "common/common.h"
17 #include "minddata/dataset/include/dataset/datasets.h"
18 #include "minddata/dataset/include/dataset/vision.h"
19 
20 using namespace mindspore::dataset;
21 
22 // Helper function to get the session id from SESSION_ID env variable
23 Status GetSessionFromEnv(session_id_type *session_id);
24 
25 class MindDataTestCacheOp : public UT::DatasetOpTesting {
26  public:
27   void SetUp() override { DatasetOpTesting::SetUp(); }
28 };
29 
30 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiSamplerNull) {
31   session_id_type env_session;
32   Status s = GetSessionFromEnv(&env_session);
33   EXPECT_EQ(s, Status::OK());
34 
35   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false, "127.0.0.1", 50053, 1, 1);
36   EXPECT_NE(some_cache, nullptr);
37 
38   // Create an ImageFolder Dataset, this folder_path only has 2 images in it
39   std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
40   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, nullptr, {}, {}, some_cache);
41   EXPECT_NE(ds, nullptr);
42 
43   // Create an iterator over the result of the above dataset
44   // This will trigger the creation of the Execution Tree and launch it.
45   // Now the parameter check for ImageFolderNode would fail and we would end up with a nullptr iter.
46   std::shared_ptr<Iterator> iter = ds->CreateIterator();
47   EXPECT_EQ(iter, nullptr);
48 }
49 
50 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiNestedCache) {
51   session_id_type env_session;
52   Status s = GetSessionFromEnv(&env_session);
53   EXPECT_EQ(s, Status::OK());
54 
55   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
56   EXPECT_NE(some_cache, nullptr);
57 
58   // Create an ImageFolder Dataset, this folder_path only has 2 images in it
59   std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
60   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
61   EXPECT_NE(ds, nullptr);
62 
63   // Create objects for the tensor ops
64   std::shared_ptr<TensorTransform> decode_op = std::make_shared<vision::Decode>();
65   EXPECT_NE(decode_op, nullptr);
66 
67   // Create a Map operation on ds
68   ds = ds->Map({decode_op}, {}, {}, {"image"}, some_cache);
69   EXPECT_NE(ds, nullptr);
70 
71   // Create an iterator over the result of the above dataset
72   // This will trigger the creation of the Execution Tree and launch it.
73   // Now in the cache_error_pass would fail and we would end up with a nullptr iter.
74   std::shared_ptr<Iterator> iter = ds->CreateIterator();
75   EXPECT_EQ(iter, nullptr);
76 }
77 
78 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheImageFolderCApi) {
79   session_id_type env_session;
80   Status s = GetSessionFromEnv(&env_session);
81   EXPECT_EQ(s, Status::OK());
82 
83   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
84   EXPECT_NE(some_cache, nullptr);
85 
86   // Create an ImageFolder Dataset, this folder_path only has 2 images in it
87   std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
88   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
89   EXPECT_NE(ds, nullptr);
90 
91   // Create a Repeat operation on ds
92   int32_t repeat_num = 2;
93   ds = ds->Repeat(repeat_num);
94   EXPECT_NE(ds, nullptr);
95 
96   // Create an iterator over the result of the above dataset
97   // This will trigger the creation of the Execution Tree and launch it.
98   std::shared_ptr<Iterator> iter = ds->CreateIterator();
99   EXPECT_NE(iter, nullptr);
100 
101   // Iterate the dataset and get each row
102   std::unordered_map<std::string, mindspore::MSTensor> row;
103   ASSERT_OK(iter->GetNextRow(&row));
104 
105   uint64_t i = 0;
106   while (row.size() != 0) {
107     i++;
108     auto image = row["image"];
109     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
110     ASSERT_OK(iter->GetNextRow(&row));
111   }
112 
113   EXPECT_EQ(i, 4);
114 
115   // Manually terminate the pipeline
116   iter->Stop();
117 }
118 
119 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCocoCApi) {
120   session_id_type env_session;
121   Status s = GetSessionFromEnv(&env_session);
122   EXPECT_EQ(s, Status::OK());
123 
124   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
125   EXPECT_NE(some_cache, nullptr);
126 
127   // Create a Coco Dataset, this folder_path has 6 images in it
128   std::string folder_path = datasets_root_path_ + "/testCOCO/train/";
129   std::string annotation_file_path = datasets_root_path_ + "/testCOCO/annotations/train.json";
130   std::shared_ptr<Dataset> ds =
131     Coco(folder_path, annotation_file_path, "Detection", false, std::make_shared<RandomSampler>(), some_cache);
132   EXPECT_NE(ds, nullptr);
133 
134   // Create a Repeat operation on ds
135   int32_t repeat_num = 2;
136   ds = ds->Repeat(repeat_num);
137   EXPECT_NE(ds, nullptr);
138 
139   // Create an iterator over the result of the above dataset
140   // This will trigger the creation of the Execution Tree and launch it.
141   std::shared_ptr<Iterator> iter = ds->CreateIterator();
142   EXPECT_NE(iter, nullptr);
143 
144   // Iterate the dataset and get each row
145   std::unordered_map<std::string, mindspore::MSTensor> row;
146   ASSERT_OK(iter->GetNextRow(&row));
147 
148   uint64_t i = 0;
149   while (row.size() != 0) {
150     i++;
151     auto image = row["image"];
152     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
153     ASSERT_OK(iter->GetNextRow(&row));
154   }
155 
156   EXPECT_EQ(i, 12);
157 
158   // Manually terminate the pipeline
159   iter->Stop();
160 }
161 
162 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheMnistCApi) {
163   session_id_type env_session;
164   Status s = GetSessionFromEnv(&env_session);
165   EXPECT_EQ(s, Status::OK());
166 
167   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
168   EXPECT_NE(some_cache, nullptr);
169 
170   // Create a Mnist Dataset
171   std::string folder_path = datasets_root_path_ + "/testMnistData/";
172   std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache);
173   EXPECT_NE(ds, nullptr);
174 
175   // Create a Repeat operation on ds
176   int32_t repeat_num = 2;
177   ds = ds->Repeat(repeat_num);
178   EXPECT_NE(ds, nullptr);
179 
180   // Create an iterator over the result of the above dataset
181   // This will trigger the creation of the Execution Tree and launch it.
182   std::shared_ptr<Iterator> iter = ds->CreateIterator();
183   EXPECT_NE(iter, nullptr);
184 
185   // Iterate the dataset and get each row
186   std::unordered_map<std::string, mindspore::MSTensor> row;
187   ASSERT_OK(iter->GetNextRow(&row));
188 
189   uint64_t i = 0;
190   while (row.size() != 0) {
191     i++;
192     auto image = row["image"];
193     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
194     ASSERT_OK(iter->GetNextRow(&row));
195   }
196 
197   EXPECT_EQ(i, 20);
198 
199   // Manually terminate the pipeline
200   iter->Stop();
201 }
202 
203 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCelebaCApi) {
204   session_id_type env_session;
205   Status s = GetSessionFromEnv(&env_session);
206   EXPECT_EQ(s, Status::OK());
207 
208   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
209   EXPECT_NE(some_cache, nullptr);
210 
211   // Create a CelebA Dataset, this folder_path has 4 records in it
212   std::string folder_path = datasets_root_path_ + "/testCelebAData/";
213   std::shared_ptr<Dataset> ds =
214     CelebA(folder_path, "all", std::make_shared<RandomSampler>(false, 10), false, {}, some_cache);
215   EXPECT_NE(ds, nullptr);
216 
217   // Create a Repeat operation on ds
218   int32_t repeat_num = 2;
219   ds = ds->Repeat(repeat_num);
220   EXPECT_NE(ds, nullptr);
221 
222   // Create an iterator over the result of the above dataset
223   // This will trigger the creation of the Execution Tree and launch it.
224   std::shared_ptr<Iterator> iter = ds->CreateIterator();
225   EXPECT_NE(iter, nullptr);
226 
227   // Iterate the dataset and get each row
228   std::unordered_map<std::string, mindspore::MSTensor> row;
229   ASSERT_OK(iter->GetNextRow(&row));
230 
231   uint64_t i = 0;
232   while (row.size() != 0) {
233     i++;
234     auto image = row["image"];
235     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
236     ASSERT_OK(iter->GetNextRow(&row));
237   }
238 
239   EXPECT_EQ(i, 8);
240 
241   // Manually terminate the pipeline
242   iter->Stop();
243 }
244 
245 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheManifestCApi) {
246   session_id_type env_session;
247   Status s = GetSessionFromEnv(&env_session);
248   EXPECT_EQ(s, Status::OK());
249 
250   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
251   EXPECT_NE(some_cache, nullptr);
252 
253   // Create a Manifest Dataset, this file_path has 2 records in it
254   std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
255   std::shared_ptr<Dataset> ds = Manifest(file_path, "train", std::make_shared<RandomSampler>(), {}, false, some_cache);
256   EXPECT_NE(ds, nullptr);
257 
258   // Create a Repeat operation on ds
259   int32_t repeat_num = 2;
260   ds = ds->Repeat(repeat_num);
261   EXPECT_NE(ds, nullptr);
262 
263   // Create an iterator over the result of the above dataset
264   // This will trigger the creation of the Execution Tree and launch it.
265   std::shared_ptr<Iterator> iter = ds->CreateIterator();
266   EXPECT_NE(iter, nullptr);
267 
268   // Iterate the dataset and get each row
269   std::unordered_map<std::string, mindspore::MSTensor> row;
270   ASSERT_OK(iter->GetNextRow(&row));
271 
272   uint64_t i = 0;
273   while (row.size() != 0) {
274     i++;
275     auto image = row["image"];
276     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
277     ASSERT_OK(iter->GetNextRow(&row));
278   }
279 
280   EXPECT_EQ(i, 4);
281 
282   // Manually terminate the pipeline
283   iter->Stop();
284 }
285 
286 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar10CApi) {
287   session_id_type env_session;
288   Status s = GetSessionFromEnv(&env_session);
289   EXPECT_EQ(s, Status::OK());
290 
291   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
292   EXPECT_NE(some_cache, nullptr);
293 
294   // Create a Cifar10 Dataset
295   std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
296   std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache);
297   EXPECT_NE(ds, nullptr);
298 
299   // Create a Repeat operation on ds
300   int32_t repeat_num = 2;
301   ds = ds->Repeat(repeat_num);
302   EXPECT_NE(ds, nullptr);
303 
304   // Create an iterator over the result of the above dataset
305   // This will trigger the creation of the Execution Tree and launch it.
306   std::shared_ptr<Iterator> iter = ds->CreateIterator();
307   EXPECT_NE(iter, nullptr);
308 
309   // Iterate the dataset and get each row
310   std::unordered_map<std::string, mindspore::MSTensor> row;
311   ASSERT_OK(iter->GetNextRow(&row));
312 
313   uint64_t i = 0;
314   while (row.size() != 0) {
315     i++;
316     auto image = row["image"];
317     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
318     ASSERT_OK(iter->GetNextRow(&row));
319   }
320 
321   EXPECT_EQ(i, 20);
322 
323   // Manually terminate the pipeline
324   iter->Stop();
325 }
326 
327 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar100CApi) {
328   session_id_type env_session;
329   Status s = GetSessionFromEnv(&env_session);
330   EXPECT_EQ(s, Status::OK());
331 
332   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
333   EXPECT_NE(some_cache, nullptr);
334 
335   // Create a Cifar100 Dataset
336   std::string folder_path = datasets_root_path_ + "/testCifar100Data/";
337   std::shared_ptr<Dataset> ds = Cifar100(folder_path, "all", std::make_shared<RandomSampler>(false, 10), some_cache);
338   EXPECT_NE(ds, nullptr);
339 
340   // Create a Repeat operation on ds
341   int32_t repeat_num = 2;
342   ds = ds->Repeat(repeat_num);
343   EXPECT_NE(ds, nullptr);
344 
345   // Create an iterator over the result of the above dataset
346   // This will trigger the creation of the Execution Tree and launch it.
347   std::shared_ptr<Iterator> iter = ds->CreateIterator();
348   EXPECT_NE(iter, nullptr);
349 
350   // Iterate the dataset and get each row
351   std::unordered_map<std::string, mindspore::MSTensor> row;
352   ASSERT_OK(iter->GetNextRow(&row));
353 
354   uint64_t i = 0;
355   while (row.size() != 0) {
356     i++;
357     auto image = row["image"];
358     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
359     ASSERT_OK(iter->GetNextRow(&row));
360   }
361 
362   EXPECT_EQ(i, 20);
363 
364   // Manually terminate the pipeline
365   iter->Stop();
366 }
367 
368 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheVocCApi) {
369   session_id_type env_session;
370   Status s = GetSessionFromEnv(&env_session);
371   EXPECT_EQ(s, Status::OK());
372 
373   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
374   EXPECT_NE(some_cache, nullptr);
375 
376   // Create a VOC Dataset, this folder_path has 9 records in it
377   std::string folder_path = datasets_root_path_ + "/testVOC2012/";
378   std::shared_ptr<Dataset> ds =
379     VOC(folder_path, "Detection", "train", {}, false, std::make_shared<RandomSampler>(), some_cache);
380   EXPECT_NE(ds, nullptr);
381 
382   // Create a Repeat operation on ds
383   int32_t repeat_num = 2;
384   ds = ds->Repeat(repeat_num);
385   EXPECT_NE(ds, nullptr);
386 
387   // Create an iterator over the result of the above dataset
388   // This will trigger the creation of the Execution Tree and launch it.
389   std::shared_ptr<Iterator> iter = ds->CreateIterator();
390   EXPECT_NE(iter, nullptr);
391 
392   // Iterate the dataset and get each row
393   std::unordered_map<std::string, mindspore::MSTensor> row;
394   ASSERT_OK(iter->GetNextRow(&row));
395 
396   uint64_t i = 0;
397   while (row.size() != 0) {
398     i++;
399     auto image = row["image"];
400     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
401     ASSERT_OK(iter->GetNextRow(&row));
402   }
403 
404   EXPECT_EQ(i, 18);
405 
406   // Manually terminate the pipeline
407   iter->Stop();
408 }
409 
410 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheAlbumCApi) {
411   session_id_type env_session;
412   Status s = GetSessionFromEnv(&env_session);
413   EXPECT_EQ(s, Status::OK());
414 
415   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
416   EXPECT_NE(some_cache, nullptr);
417 
418   std::string folder_path = datasets_root_path_ + "/testAlbum/images";
419   std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
420   std::vector<std::string> column_names = {"image", "label", "id"};
421   // Create a Album Dataset, 7 records in it
422   std::shared_ptr<Dataset> ds =
423     Album(folder_path, schema_file, column_names, false, std::make_shared<RandomSampler>(), some_cache);
424   EXPECT_NE(ds, nullptr);
425 
426   // Create a Repeat operation on ds
427   int32_t repeat_num = 2;
428   ds = ds->Repeat(repeat_num);
429   EXPECT_NE(ds, nullptr);
430 
431   // Create an iterator over the result of the above dataset
432   // This will trigger the creation of the Execution Tree and launch it.
433   std::shared_ptr<Iterator> iter = ds->CreateIterator();
434   EXPECT_NE(iter, nullptr);
435 
436   // Iterate the dataset and get each row
437   std::unordered_map<std::string, mindspore::MSTensor> row;
438   ASSERT_OK(iter->GetNextRow(&row));
439 
440   uint64_t i = 0;
441   while (row.size() != 0) {
442     i++;
443     ASSERT_OK(iter->GetNextRow(&row));
444   }
445 
446   EXPECT_EQ(i, 14);
447 
448   // Manually terminate the pipeline
449   iter->Stop();
450 }
451 
452 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheMindRecordCApi) {
453   session_id_type env_session;
454   Status s = GetSessionFromEnv(&env_session);
455   EXPECT_EQ(s, Status::OK());
456 
457   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
458   EXPECT_NE(some_cache, nullptr);
459 
460   // Create a MindData Dataset
461   // Pass one mindrecord shard file to parse dataset info, and search for other mindrecord files with same dataset info,
462   // thus all records in imagenet.mindrecord0 ~ imagenet.mindrecord3 will be read
463   std::string file_path = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
464 
465   // Create a MindRecord Dataset, 20 records in it
466   std::shared_ptr<Dataset> ds = MindData(file_path, {}, std::make_shared<RandomSampler>(), nullptr, 0,
467                                          ShuffleMode::kGlobal, some_cache);
468   EXPECT_NE(ds, nullptr);
469 
470   // Create an iterator over the result of the above dataset
471   // This will trigger the creation of the Execution Tree and launch it.
472   std::shared_ptr<Iterator> iter = ds->CreateIterator();
473   EXPECT_NE(iter, nullptr);
474 
475   // Iterate the dataset and get each row
476   std::unordered_map<std::string, mindspore::MSTensor> row;
477   ASSERT_OK(iter->GetNextRow(&row));
478 
479   uint64_t i = 0;
480   while (row.size() != 0) {
481     i++;
482     ASSERT_OK(iter->GetNextRow(&row));
483   }
484 
485   EXPECT_EQ(i, 20);
486 
487   // Manually terminate the pipeline
488   iter->Stop();
489 }
490 
491 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataCApi) {
492   session_id_type env_session;
493   Status s = GetSessionFromEnv(&env_session);
494   EXPECT_EQ(s, Status::OK());
495 
496   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
497   EXPECT_NE(some_cache, nullptr);
498 
499   // Create a RandomDataset
500   std::shared_ptr<SchemaObj> schema = Schema();
501 
502   ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2}));
503   ASSERT_OK(schema->add_column("label", mindspore::DataType::kNumberTypeUInt8, {1}));
504   std::shared_ptr<Dataset> ds = RandomData(8, schema, {}, some_cache);
505   EXPECT_NE(ds, nullptr);
506 
507   // Create a Repeat operation on ds
508   int32_t repeat_num = 2;
509   ds = ds->Repeat(repeat_num);
510   EXPECT_NE(ds, nullptr);
511 
512   // Create an iterator over the result of the above dataset
513   // This will trigger the creation of the Execution Tree and launch it.
514   std::shared_ptr<Iterator> iter = ds->CreateIterator();
515   EXPECT_NE(iter, nullptr);
516 
517   // Iterate the dataset and get each row
518   std::unordered_map<std::string, mindspore::MSTensor> row;
519   ASSERT_OK(iter->GetNextRow(&row));
520 
521   uint64_t i = 0;
522   while (row.size() != 0) {
523     i++;
524     ASSERT_OK(iter->GetNextRow(&row));
525   }
526 
527   EXPECT_EQ(i, 16);
528 
529   // Manually terminate the pipeline
530   iter->Stop();
531 }
532 
533 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi1) {
534   session_id_type env_session;
535   Status s = GetSessionFromEnv(&env_session);
536   EXPECT_EQ(s, Status::OK());
537 
538   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
539   EXPECT_NE(some_cache, nullptr);
540 
541   // Create a TFRecord Dataset, this file_path has 3 records in it
542   std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
543   std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
544   std::shared_ptr<Dataset> ds =
545     TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 1, 0, false, some_cache);
546   EXPECT_NE(ds, nullptr);
547 
548   // Create a Repeat operation on ds
549   int32_t repeat_num = 2;
550   ds = ds->Repeat(repeat_num);
551   EXPECT_NE(ds, nullptr);
552 
553   // Create an iterator over the result of the above dataset
554   // This will trigger the creation of the Execution Tree and launch it.
555   std::shared_ptr<Iterator> iter = ds->CreateIterator();
556   EXPECT_NE(iter, nullptr);
557 
558   // Iterate the dataset and get each row
559   std::unordered_map<std::string, mindspore::MSTensor> row;
560   ASSERT_OK(iter->GetNextRow(&row));
561 
562   uint64_t i = 0;
563   while (row.size() != 0) {
564     i++;
565     auto image = row["image"];
566     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
567     ASSERT_OK(iter->GetNextRow(&row));
568   }
569 
570   EXPECT_EQ(i, 6);
571 
572   // Manually terminate the pipeline
573   iter->Stop();
574 }
575 
576 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi2) {
577   session_id_type env_session;
578   Status s = GetSessionFromEnv(&env_session);
579   EXPECT_EQ(s, Status::OK());
580 
581   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
582   EXPECT_NE(some_cache, nullptr);
583 
584   // Create a TFRecord Dataset, this file_path has 3 records in it
585   std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
586   std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
587 
588   // In this one, the TFRecord dataset will be given sharding configuration, however since a cache is
589   // used, the tree prepare should undo the sharding configuration and instead, a distributed
590   // sampler will be chosen with the same shard config.
591   // With only 3 records shard into 3, we expect only 1 record returned for this shard
592   // However, the sharding will be done by the sampler, not by the TFRecord leaf node
593   // In this case, it is a row-based sharding, not the file-based sharding that would happen if
594   // there was not any cache.
595   std::shared_ptr<Dataset> ds =
596     TFRecord({file_path}, schema_path, {"image"}, 0, ShuffleMode::kFalse, 3, 0, false, some_cache);
597   EXPECT_NE(ds, nullptr);
598 
599   // Create a Repeat operation on ds
600   int32_t repeat_num = 2;
601   ds = ds->Repeat(repeat_num);
602   EXPECT_NE(ds, nullptr);
603 
604   // Create an iterator over the result of the above dataset
605   // This will trigger the creation of the Execution Tree and launch it.
606   std::shared_ptr<Iterator> iter = ds->CreateIterator();
607   EXPECT_NE(iter, nullptr);
608 
609   // Iterate the dataset and get each row
610   std::unordered_map<std::string, mindspore::MSTensor> row;
611   ASSERT_OK(iter->GetNextRow(&row));
612 
613   uint64_t i = 0;
614   while (row.size() != 0) {
615     i++;
616     auto image = row["image"];
617     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
618     ASSERT_OK(iter->GetNextRow(&row));
619   }
620 
621   EXPECT_EQ(i, 2);
622 
623   // Manually terminate the pipeline
624   iter->Stop();
625 }
626 
627 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTFRecordCApi3) {
628   session_id_type env_session;
629   Status s = GetSessionFromEnv(&env_session);
630   EXPECT_EQ(s, Status::OK());
631 
632   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
633   EXPECT_NE(some_cache, nullptr);
634 
635   // Create a TFRecord Dataset, this file_path has 3 records in it
636   std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
637   std::string schema_path = datasets_root_path_ + "/test_tf_file_3_images2/datasetSchema.json";
638 
639   // In this one, a num_samples argument is given.
640   // In this case, a sequential sampler would be chosen with the same num_samples argument.
641   // The samples will be selected by the sequential sampler, not by the TFRecord leaf node.
642   std::shared_ptr<Dataset> ds =
643     TFRecord({file_path}, schema_path, {"image"}, 2, ShuffleMode::kFalse, 1, 0, false, some_cache);
644   EXPECT_NE(ds, nullptr);
645 
646   // Create a Repeat operation on ds
647   int32_t repeat_num = 2;
648   ds = ds->Repeat(repeat_num);
649   EXPECT_NE(ds, nullptr);
650 
651   // Create an iterator over the result of the above dataset
652   // This will trigger the creation of the Execution Tree and launch it.
653   std::shared_ptr<Iterator> iter = ds->CreateIterator();
654   EXPECT_NE(iter, nullptr);
655 
656   // Iterate the dataset and get each row
657   std::unordered_map<std::string, mindspore::MSTensor> row;
658   ASSERT_OK(iter->GetNextRow(&row));
659 
660   uint64_t i = 0;
661   while (row.size() != 0) {
662     i++;
663     auto image = row["image"];
664     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
665     ASSERT_OK(iter->GetNextRow(&row));
666   }
667 
668   EXPECT_EQ(i, 4);
669 
670   // Manually terminate the pipeline
671   iter->Stop();
672 }
673 
674 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheTextfileCApi) {
675   session_id_type env_session;
676   Status s = GetSessionFromEnv(&env_session);
677   EXPECT_EQ(s, Status::OK());
678 
679   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
680   EXPECT_NE(some_cache, nullptr);
681 
682   // Create a TextFile Dataset, this file_path has 3 records in it
683   std::string file_path = datasets_root_path_ + "/testTextFileDataset/1.txt";
684 
685   // In this one, a num_samples=2 argument is given.
686   // In this case, a sequential sampler would be chosen with the same num_samples argument.
687   // The samples will be selected by the sequential sampler, not by the TextFile leaf node.
688   std::shared_ptr<Dataset> ds = TextFile({file_path}, 2, ShuffleMode::kGlobal, 1, 0, some_cache);
689   EXPECT_NE(ds, nullptr);
690 
691   // Create a Repeat operation on ds
692   int32_t repeat_num = 2;
693   ds = ds->Repeat(repeat_num);
694   EXPECT_NE(ds, nullptr);
695 
696   // Create an iterator over the result of the above dataset
697   // This will trigger the creation of the Execution Tree and launch it.
698   std::shared_ptr<Iterator> iter = ds->CreateIterator();
699   EXPECT_NE(iter, nullptr);
700 
701   // Iterate the dataset and get each row
702   std::unordered_map<std::string, mindspore::MSTensor> row;
703   ASSERT_OK(iter->GetNextRow(&row));
704 
705   uint64_t i = 0;
706   while (row.size() != 0) {
707     i++;
708     ASSERT_OK(iter->GetNextRow(&row));
709   }
710 
711   EXPECT_EQ(i, 4);
712 
713   // Manually terminate the pipeline
714   iter->Stop();
715 }
716 
717 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCsvCApi) {
718   session_id_type env_session;
719   Status s = GetSessionFromEnv(&env_session);
720   EXPECT_EQ(s, Status::OK());
721 
722   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
723   EXPECT_NE(some_cache, nullptr);
724 
725   // Create a CSV Dataset, this file_path has 3 records in it
726   std::string file_path = datasets_root_path_ + "/testCSV/1.csv";
727   std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"};
728 
729   // In this one, a num_samples=2 argument is given.
730   // In this case, a sequential sampler would be chosen with the same num_samples argument.
731   // The samples will be selected by the sequential sampler, not by the CSV leaf node.
732   std::shared_ptr<Dataset> ds = CSV({file_path}, ',', {}, column_names, 2, ShuffleMode::kFalse, 1, 0, some_cache);
733   EXPECT_NE(ds, nullptr);
734 
735   // Create a Repeat operation on ds
736   int32_t repeat_num = 2;
737   ds = ds->Repeat(repeat_num);
738   EXPECT_NE(ds, nullptr);
739 
740   // Create an iterator over the result of the above dataset
741   // This will trigger the creation of the Execution Tree and launch it.
742   std::shared_ptr<Iterator> iter = ds->CreateIterator();
743   EXPECT_NE(iter, nullptr);
744 
745   // Iterate the dataset and get each row
746   std::unordered_map<std::string, mindspore::MSTensor> row;
747   ASSERT_OK(iter->GetNextRow(&row));
748 
749   uint64_t i = 0;
750   while (row.size() != 0) {
751     i++;
752     ASSERT_OK(iter->GetNextRow(&row));
753   }
754 
755   EXPECT_EQ(i, 4);
756 
757   // Manually terminate the pipeline
758   iter->Stop();
759 }
760 
761 TEST_F(MindDataTestCacheOp, DISABLED_TestCacheClueCApi) {
762   session_id_type env_session;
763   Status s = GetSessionFromEnv(&env_session);
764   EXPECT_EQ(s, Status::OK());
765 
766   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
767   EXPECT_NE(some_cache, nullptr);
768 
769   // Create a CLUE Dataset, this file_path has 3 records in it
770   std::string file_path = datasets_root_path_ + "/testCLUE/afqmc/train.json";
771   std::string task = "AFQMC";
772   std::string usage = "train";
773 
774   // In this one, a num_samples=2 argument is given.
775   // In this case, a sequential sampler would be chosen with the same num_samples argument.
776   // The samples will be selected by the sequential sampler, not by the CLUE leaf node.
777   std::shared_ptr<Dataset> ds = CLUE({file_path}, task, usage, 2, ShuffleMode::kFalse, 1, 0, some_cache);
778   EXPECT_NE(ds, nullptr);
779 
780   // Create a Repeat operation on ds
781   int32_t repeat_num = 2;
782   ds = ds->Repeat(repeat_num);
783   EXPECT_NE(ds, nullptr);
784 
785   // Create an iterator over the result of the above dataset
786   // This will trigger the creation of the Execution Tree and launch it.
787   std::shared_ptr<Iterator> iter = ds->CreateIterator();
788   EXPECT_NE(iter, nullptr);
789 
790   // Iterate the dataset and get each row
791   std::unordered_map<std::string, mindspore::MSTensor> row;
792   ASSERT_OK(iter->GetNextRow(&row));
793 
794   uint64_t i = 0;
795   while (row.size() != 0) {
796     i++;
797     ASSERT_OK(iter->GetNextRow(&row));
798   }
799 
800   EXPECT_EQ(i, 4);
801 
802   // Manually terminate the pipeline
803   iter->Stop();
804 }
805 
806 TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare1) {
807   session_id_type env_session;
808   Status s = GetSessionFromEnv(&env_session);
809   EXPECT_EQ(s, Status::OK());
810 
811   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
812   EXPECT_NE(some_cache, nullptr);
813 
814   // Create an ImageFolder Dataset, this folder_path only has 2 images in it
815   std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
816   std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
817   EXPECT_NE(ds1, nullptr);
818   std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
819   EXPECT_NE(ds2, nullptr);
820 
821   // Create and launch the Execution Tree for ds1
822   std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
823   EXPECT_NE(iter1, nullptr);
824   // Iterate the dataset and get each row
825   std::unordered_map<std::string, mindspore::MSTensor> row;
826   ASSERT_OK(iter1->GetNextRow(&row));
827 
828   uint64_t i = 0;
829   while (row.size() != 0) {
830     i++;
831     auto image = row["image"];
832     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
833     ASSERT_OK(iter1->GetNextRow(&row));
834   }
835   EXPECT_EQ(i, 2);
836   // Manually terminate the pipeline
837   iter1->Stop();
838 
839   // Create and launch the Execution Tree for ds2
840   std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
841   EXPECT_NE(iter2, nullptr);
842   // Iterate the dataset and get each row
843   ASSERT_OK(iter2->GetNextRow(&row));
844 
845   i = 0;
846   while (row.size() != 0) {
847     i++;
848     auto image = row["image"];
849     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
850     ASSERT_OK(iter2->GetNextRow(&row));
851   }
852   EXPECT_EQ(i, 2);
853 
854   // Manually terminate the pipeline
855   iter2->Stop();
856 }
857 
858 TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare2) {
859   session_id_type env_session;
860   Status s = GetSessionFromEnv(&env_session);
861   EXPECT_EQ(s, Status::OK());
862 
863   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
864   EXPECT_NE(some_cache, nullptr);
865 
866   // Create an ImageFolder Dataset, this folder_path only has 2 images in it
867   std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
868   // The first pipeline is ImageFolder with RandomSampler, the second pipeline is ImageFolder with SequentialSampler
869   // Since sampler does not influence the data in the source, these two pipelines can share a common cache.
870   std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(), {}, {}, some_cache);
871   EXPECT_NE(ds1, nullptr);
872   std::shared_ptr<Dataset> ds2 =
873     ImageFolder(folder_path, true, std::make_shared<SequentialSampler>(), {}, {}, some_cache);
874   EXPECT_NE(ds2, nullptr);
875 
876   // Create and launch the Execution Tree for ds1
877   std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
878   EXPECT_NE(iter1, nullptr);
879   // Iterate the dataset and get each row
880   std::unordered_map<std::string, mindspore::MSTensor> row;
881   ASSERT_OK(iter1->GetNextRow(&row));
882 
883   uint64_t i = 0;
884   while (row.size() != 0) {
885     i++;
886     auto image = row["image"];
887     ASSERT_OK(iter1->GetNextRow(&row));
888   }
889   EXPECT_EQ(i, 2);
890   // Manually terminate the pipeline
891   iter1->Stop();
892 
893   // Create and launch the Execution Tree for ds2
894   std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
895   EXPECT_NE(iter2, nullptr);
896   // Iterate the dataset and get each row
897   ASSERT_OK(iter2->GetNextRow(&row));
898 
899   i = 0;
900   while (row.size() != 0) {
901     i++;
902     auto image = row["image"];
903     ASSERT_OK(iter2->GetNextRow(&row));
904   }
905   EXPECT_EQ(i, 2);
906 
907   // Manually terminate the pipeline
908   iter2->Stop();
909 }
910 
911 TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShareFailure1) {
912   session_id_type env_session;
913   Status s = GetSessionFromEnv(&env_session);
914   EXPECT_EQ(s, Status::OK());
915 
916   std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false);
917   EXPECT_NE(some_cache, nullptr);
918 
919   // Create an ImageFolder Dataset, this folder_path only has 2 images in it
920   std::string folder_path = datasets_root_path_ + "/testImageNetData/train/";
921   std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(), {}, {}, some_cache);
922   EXPECT_NE(ds1, nullptr);
923   std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, false, std::make_shared<RandomSampler>(), {}, {}, some_cache);
924   EXPECT_NE(ds2, nullptr);
925 
926   // Create and launch the Execution Tree for ds1
927   std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
928   EXPECT_NE(iter1, nullptr);
929   // Iterate the dataset and get each row
930   std::unordered_map<std::string, mindspore::MSTensor> row;
931   ASSERT_OK(iter1->GetNextRow(&row));
932 
933   uint64_t i = 0;
934   while (row.size() != 0) {
935     i++;
936     auto image = row["image"];
937     ASSERT_OK(iter1->GetNextRow(&row));
938   }
939   EXPECT_EQ(i, 2);
940   // Manually terminate the pipeline
941   iter1->Stop();
942 
943   // Re-use a cache for the second pipeline would fail
944   std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
945   EXPECT_EQ(iter2, nullptr);
946 }
947