• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "common/common.h"
17 #include "minddata/dataset/include/dataset/datasets.h"
18 #include "minddata/dataset/core/tensor.h"
19 
20 using namespace mindspore::dataset;
21 using mindspore::dataset::Tensor;
22 
23 class MindDataTestPipeline : public UT::DatasetOpTesting {
24  protected:
25 };
26 
TEST_F(MindDataTestPipeline,TestMindDataSuccess1)27 TEST_F(MindDataTestPipeline, TestMindDataSuccess1) {
28   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess1 with string file pattern.";
29 
30   // Create a MindData Dataset
31   // Pass one mindrecord shard file to parse dataset info, and search for other mindrecord files with same dataset info,
32   // thus all records in imagenet.mindrecord0 ~ imagenet.mindrecord3 will be read
33   std::string file_path = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
34   std::shared_ptr<Dataset> ds = MindData(file_path);
35   EXPECT_NE(ds, nullptr);
36 
37   // Create an iterator over the result of the above dataset
38   // This will trigger the creation of the Execution Tree and launch it.
39   std::shared_ptr<Iterator> iter = ds->CreateIterator();
40   EXPECT_NE(iter, nullptr);
41 
42   // Iterate the dataset and get each row
43   std::unordered_map<std::string, mindspore::MSTensor> row;
44   ASSERT_OK(iter->GetNextRow(&row));
45 
46   uint64_t i = 0;
47   while (row.size() != 0) {
48     i++;
49     auto image = row["file_name"];
50     TEST_MS_LOG_MSTENSOR(INFO, "Tensor image file name: ", image);
51 
52     ASSERT_OK(iter->GetNextRow(&row));
53   }
54 
55   // Each *.mindrecord file has 5 rows, so there are 20 rows in total(imagenet.mindrecord0 ~ imagenet.mindrecord3)
56   EXPECT_EQ(i, 20);
57 
58   // Manually terminate the pipeline
59   iter->Stop();
60 }
61 
TEST_F(MindDataTestPipeline,TestMindDataGetters)62 TEST_F(MindDataTestPipeline, TestMindDataGetters) {
63   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataGetters with string file pattern.";
64 
65   // Create a MindData Dataset
66   // Pass one mindrecord shard file to parse dataset info, and search for other mindrecord files with same dataset info,
67   // thus all records in imagenet.mindrecord0 ~ imagenet.mindrecord3 will be read
68   std::string file_path = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
69   std::shared_ptr<Dataset> ds = MindData(file_path);
70   EXPECT_NE(ds, nullptr);
71 
72   std::vector<std::string> column_names = {"data", "file_name", "label"};
73 
74   EXPECT_EQ(ds->GetDatasetSize(), 20);
75   EXPECT_EQ(ds->GetColumnNames(), column_names);
76 }
77 
TEST_F(MindDataTestPipeline,TestMindDataSuccess2)78 TEST_F(MindDataTestPipeline, TestMindDataSuccess2) {
79   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess2 with a vector of single mindrecord file.";
80 
81   // Create a MindData Dataset
82   // Pass a list of mindrecord file name, files in list will be read directly but not search for related files
83   std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
84   std::shared_ptr<Dataset> ds = MindData(std::vector<std::string>{file_path1});
85   EXPECT_NE(ds, nullptr);
86 
87   // Create an iterator over the result of the above dataset
88   // This will trigger the creation of the Execution Tree and launch it.
89   std::shared_ptr<Iterator> iter = ds->CreateIterator();
90   EXPECT_NE(iter, nullptr);
91 
92   // Iterate the dataset and get each row
93   std::unordered_map<std::string, mindspore::MSTensor> row;
94   ASSERT_OK(iter->GetNextRow(&row));
95 
96   uint64_t i = 0;
97   while (row.size() != 0) {
98     i++;
99     auto image = row["file_name"];
100     TEST_MS_LOG_MSTENSOR(INFO, "Tensor image file name: ", image);
101 
102     ASSERT_OK(iter->GetNextRow(&row));
103   }
104 
105   // Only records in imagenet.mindrecord0 are read
106   EXPECT_EQ(i, 5);
107 
108   // Manually terminate the pipeline
109   iter->Stop();
110 }
111 
TEST_F(MindDataTestPipeline,TestMindDataSuccess3)112 TEST_F(MindDataTestPipeline, TestMindDataSuccess3) {
113   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess3 with a vector of multiple mindrecord files.";
114 
115   // Create a MindData Dataset
116   // Pass a list of mindrecord file name, files in list will be read directly but not search for related files
117   std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
118   std::string file_path2 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord1";
119   std::vector<std::string> file_list = {file_path1, file_path2};
120   std::shared_ptr<Dataset> ds = MindData(file_list);
121   EXPECT_NE(ds, nullptr);
122 
123   // Create an iterator over the result of the above dataset
124   // This will trigger the creation of the Execution Tree and launch it.
125   std::shared_ptr<Iterator> iter = ds->CreateIterator();
126   EXPECT_NE(iter, nullptr);
127 
128   // Iterate the dataset and get each row
129   std::unordered_map<std::string, mindspore::MSTensor> row;
130   ASSERT_OK(iter->GetNextRow(&row));
131 
132   uint64_t i = 0;
133   while (row.size() != 0) {
134     i++;
135     auto image = row["file_name"];
136     TEST_MS_LOG_MSTENSOR(INFO, "Tensor image file name: ", image);
137 
138     ASSERT_OK(iter->GetNextRow(&row));
139   }
140 
141   // Only records in imagenet.mindrecord0 and imagenet.mindrecord1 are read
142   EXPECT_EQ(i, 10);
143 
144   // Manually terminate the pipeline
145   iter->Stop();
146 }
147 
TEST_F(MindDataTestPipeline,TestMindDataSuccess4)148 TEST_F(MindDataTestPipeline, TestMindDataSuccess4) {
149   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess4 with specified column.";
150 
151   // Create a MindData Dataset
152   // Pass one mindrecord shard file to parse dataset info, and search for other mindrecord files with same dataset info,
153   // thus all records in imagenet.mindrecord0 ~ imagenet.mindrecord3 will be read
154   std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord1";
155   std::shared_ptr<Dataset> ds = MindData(file_path1, {"label"});
156   EXPECT_NE(ds, nullptr);
157 
158   // Create an iterator over the result of the above dataset
159   // This will trigger the creation of the Execution Tree and launch it.
160   std::shared_ptr<Iterator> iter = ds->CreateIterator();
161   EXPECT_NE(iter, nullptr);
162 
163   // Iterate the dataset and get each row
164   std::unordered_map<std::string, mindspore::MSTensor> row;
165   ASSERT_OK(iter->GetNextRow(&row));
166 
167   uint64_t i = 0;
168   while (row.size() != 0) {
169     i++;
170     auto label = row["label"];
171     TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
172     ASSERT_OK(iter->GetNextRow(&row));
173   }
174 
175   // Shard file "mindrecord0/mindrecord1/mindrecord2/mindrecord3" have same dataset info,
176   // thus if input file is any of them, all records in imagenet.mindrecord* will be read
177   EXPECT_EQ(i, 20);
178 
179   // Manually terminate the pipeline
180   iter->Stop();
181 }
182 
TEST_F(MindDataTestPipeline,TestMindDataSuccess5)183 TEST_F(MindDataTestPipeline, TestMindDataSuccess5) {
184   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess5 with specified sampler.";
185 
186   // Create a MindData Dataset
187   // Pass one mindrecord shard file to parse dataset info, and search for other mindrecord files with same dataset info,
188   // thus all records in imagenet.mindrecord0 ~ imagenet.mindrecord3 will be read
189   std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
190   std::shared_ptr<Dataset> ds = MindData(file_path1, {}, std::make_shared<SequentialSampler>(0, 3));
191   EXPECT_NE(ds, nullptr);
192 
193   // Create an iterator over the result of the above dataset
194   // This will trigger the creation of the Execution Tree and launch it.
195   std::shared_ptr<Iterator> iter = ds->CreateIterator();
196   EXPECT_NE(iter, nullptr);
197 
198   // Iterate the dataset and get each row
199   std::unordered_map<std::string, mindspore::MSTensor> row;
200   ASSERT_OK(iter->GetNextRow(&row));
201 
202   std::shared_ptr<Tensor> de_expect_item;
203   ASSERT_OK(Tensor::CreateScalar((int64_t)0, &de_expect_item));
204   mindspore::MSTensor expect_item = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expect_item));
205 
206   uint64_t i = 0;
207   while (row.size() != 0) {
208     i++;
209     auto label = row["label"];
210 
211     EXPECT_MSTENSOR_EQ(label, expect_item);
212 
213     ASSERT_OK(iter->GetNextRow(&row));
214   }
215 
216   // SequentialSampler will return 3 samples
217   EXPECT_EQ(i, 3);
218 
219   // Manually terminate the pipeline
220   iter->Stop();
221 }
222 
TEST_F(MindDataTestPipeline,TestMindDataSuccess6)223 TEST_F(MindDataTestPipeline, TestMindDataSuccess6) {
224   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess6 with num_samples out of range.";
225 
226   // Create a MindData Dataset
227   // Pass a list of mindrecord file name, files in list will be read directly but not search for related files
228   // imagenet.mindrecord0 file has 5 rows, but num_samples is larger than 5
229   std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
230   std::vector<std::string> file_list = {file_path1};
231 
232   // Check sequential sampler, output number is 5
233   std::shared_ptr<Dataset> ds1 = MindData(file_list, {}, std::make_shared<SequentialSampler>(0, 10));
234   EXPECT_NE(ds1, nullptr);
235 
236   // Check random sampler, output number is 5, same rows with file
237   std::shared_ptr<Dataset> ds2 = MindData(file_list, {}, std::make_shared<RandomSampler>(false, 10));
238   EXPECT_NE(ds2, nullptr);
239 
240   // Check pk sampler, output number is 2, get 2 samples with label 0
241   std::shared_ptr<Dataset> ds3 = MindData(file_list, {}, std::make_shared<PKSampler>(2, false, 10));
242   EXPECT_NE(ds3, nullptr);
243 
244   // Check distributed sampler, output number is 3, get 3 samples in shard 0
245   std::shared_ptr<Dataset> ds4 = MindData(file_list, {}, std::make_shared<DistributedSampler>(2, 0, false, 10));
246   EXPECT_NE(ds4, nullptr);
247 
248   // Check distributed sampler get 3 samples with indice 0, 1 ,2
249   std::shared_ptr<Dataset> ds5 = MindData(file_list, {}, new SubsetRandomSampler({0, 1, 2}, 10));
250   EXPECT_NE(ds5, nullptr);
251 
252   std::shared_ptr<Dataset> ds6 = MindData(file_list, {}, new SubsetSampler({1, 2}, 10));
253   EXPECT_NE(ds5, nullptr);
254 
255   std::vector<std::shared_ptr<Dataset>> ds = {ds1, ds2, ds3, ds4, ds5, ds6};
256   std::vector<int32_t> expected_samples = {5, 5, 2, 3, 3, 2};
257 
258   for (int32_t i = 0; i < ds.size(); i++) {
259     // Create an iterator over the result of the above dataset
260     // This will trigger the creation of the Execution Tree and launch it.
261     std::shared_ptr<Iterator> iter = ds[i]->CreateIterator();
262     EXPECT_NE(iter, nullptr);
263 
264     // Iterate the dataset and get each row
265     std::unordered_map<std::string, mindspore::MSTensor> row;
266     ASSERT_OK(iter->GetNextRow(&row));
267 
268     uint64_t j = 0;
269     while (row.size() != 0) {
270       j++;
271       auto label = row["label"];
272       TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
273       ASSERT_OK(iter->GetNextRow(&row));
274     }
275     EXPECT_EQ(j, expected_samples[i]);
276 
277     // Manually terminate the pipeline
278     iter->Stop();
279   }
280 }
281 
TEST_F(MindDataTestPipeline,TestMindDataSuccess7)282 TEST_F(MindDataTestPipeline, TestMindDataSuccess7) {
283   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess7 with padded sample.";
284 
285   // Create pad sample for MindDataset
286   auto pad = nlohmann::json::object();
287   pad["file_name"] = "does_not_exist.jpg";
288   pad["label"] = 999;
289 
290   // Create a MindData Dataset
291   // Pass a list of mindrecord file name, files in list will be read directly but not search for related files
292   std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
293   std::vector<std::string> file_list = {file_path1};
294   std::shared_ptr<Dataset> ds =
295     MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), &pad, 4);
296   EXPECT_NE(ds, nullptr);
297 
298   // Create a Skip operation on ds, skip original data in mindrecord and get padded samples
299   ds = ds->Skip(5);
300   EXPECT_NE(ds, nullptr);
301 
302   // Create an iterator over the result of the above dataset
303   // This will trigger the creation of the Execution Tree and launch it.
304   std::shared_ptr<Iterator> iter = ds->CreateIterator();
305   EXPECT_NE(iter, nullptr);
306 
307   // Iterate the dataset and get each row
308   std::unordered_map<std::string, mindspore::MSTensor> row;
309   ASSERT_OK(iter->GetNextRow(&row));
310 
311   std::shared_ptr<Tensor> de_expect_item;
312   ASSERT_OK(Tensor::CreateScalar((int64_t)999, &de_expect_item));
313   mindspore::MSTensor expect_item = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expect_item));
314 
315   uint64_t i = 0;
316   while (row.size() != 0) {
317     i++;
318     auto image = row["file_name"];
319     auto label = row["label"];
320     TEST_MS_LOG_MSTENSOR(INFO, "Tensor image file name: ", image);
321     TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
322 
323     EXPECT_MSTENSOR_EQ(label, expect_item);
324 
325     ASSERT_OK(iter->GetNextRow(&row));
326   }
327 
328   EXPECT_EQ(i, 4);
329 
330   // Manually terminate the pipeline
331   iter->Stop();
332 }
333 
TEST_F(MindDataTestPipeline,TestMindDataSuccess8)334 TEST_F(MindDataTestPipeline, TestMindDataSuccess8) {
335   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess8 with padded sample.";
336 
337   // Create pad sample for MindDataset
338   auto pad = nlohmann::json::object();
339   pad["file_name"] = "does_not_exist.jpg";
340   pad["label"] = 999;
341 
342   // Create a MindData Dataset
343   // Pass a list of mindrecord file name, files in list will be read directly but not search for related files
344   std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
345   std::vector<std::string> file_list = {file_path1};
346   std::shared_ptr<Dataset> ds =
347     MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), &pad, 4);
348   EXPECT_NE(ds, nullptr);
349 
350   std::vector<mindspore::dataset::DataType> types = ToDETypes(ds->GetOutputTypes());
351   std::vector<mindspore::dataset::TensorShape> shapes = ToTensorShapeVec(ds->GetOutputShapes());
352   std::vector<std::string> column_names = {"file_name", "label"};
353   EXPECT_EQ(types.size(), 2);
354   EXPECT_EQ(types[0].ToString(), "string");
355   EXPECT_EQ(types[1].ToString(), "int64");
356   EXPECT_EQ(shapes.size(), 2);
357   EXPECT_EQ(shapes[0].ToString(), "<>");
358   EXPECT_EQ(shapes[1].ToString(), "<>");
359   EXPECT_EQ(ds->GetDatasetSize(), 5);
360   EXPECT_EQ(ds->GetRepeatCount(), 1);
361   EXPECT_EQ(ds->GetColumnNames(), column_names);
362 
363   // Create a Skip operation on ds, skip original data in mindrecord and get padded samples
364   ds = ds->Skip(5);
365   EXPECT_NE(ds, nullptr);
366 
367   // Create a Repeat operation on ds
368   int32_t repeat_num = 2;
369   ds = ds->Repeat(repeat_num);
370   EXPECT_NE(ds, nullptr);
371   EXPECT_EQ(ds->GetRepeatCount(), 2);
372 
373   // Create an iterator over the result of the above dataset
374   // This will trigger the creation of the Execution Tree and launch it.
375   std::shared_ptr<Iterator> iter = ds->CreateIterator();
376   EXPECT_NE(iter, nullptr);
377 
378   // Iterate the dataset and get each row
379   std::unordered_map<std::string, mindspore::MSTensor> row;
380   ASSERT_OK(iter->GetNextRow(&row));
381 
382   std::shared_ptr<Tensor> de_expect_item;
383   ASSERT_OK(Tensor::CreateScalar((int64_t)999, &de_expect_item));
384   mindspore::MSTensor expect_item = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expect_item));
385 
386   uint64_t i = 0;
387   while (row.size() != 0) {
388     i++;
389     auto image = row["file_name"];
390     auto label = row["label"];
391     TEST_MS_LOG_MSTENSOR(INFO, "Tensor image file name: ", image);
392     TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
393 
394     EXPECT_MSTENSOR_EQ(label, expect_item);
395 
396     ASSERT_OK(iter->GetNextRow(&row));
397   }
398 
399   EXPECT_EQ(i, 8);
400 
401   // Manually terminate the pipeline
402   iter->Stop();
403 }
404 
TEST_F(MindDataTestPipeline,TestMindDataSuccess9)405 TEST_F(MindDataTestPipeline, TestMindDataSuccess9) {
406   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess9 with padded sample.";
407 
408   // Create pad sample for MindDataset
409   auto pad = nlohmann::json::object();
410   pad["file_name"] = "does_not_exist.jpg";
411   pad["label"] = 999;
412 
413   // Create a MindData Dataset
414   // Pass a list of mindrecord file name, files in list will be read directly but not search for related files
415   std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
416   std::vector<std::string> file_list = {file_path1};
417   std::shared_ptr<Dataset> ds1 =
418     MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), &pad, 4);
419   EXPECT_NE(ds1, nullptr);
420   ds1 = ds1->Skip(5);
421   EXPECT_NE(ds1, nullptr);
422 
423   std::shared_ptr<Dataset> ds2 =
424     MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), &pad, 4);
425   EXPECT_NE(ds2, nullptr);
426   ds2 = ds2->Skip(5);
427   EXPECT_NE(ds2, nullptr);
428 
429   // Create a Repeat operation on ds
430   int32_t repeat_num = 2;
431   ds1 = ds1->Repeat(repeat_num);
432   EXPECT_NE(ds1, nullptr);
433   repeat_num = 3;
434   ds2 = ds2->Repeat(repeat_num);
435   EXPECT_NE(ds2, nullptr);
436 
437   // Create a Project operation on ds
438   std::vector<std::string> column_project = {"label"};
439   ds1 = ds1->Project(column_project);
440   EXPECT_NE(ds1, nullptr);
441   ds2 = ds2->Project(column_project);
442   EXPECT_NE(ds2, nullptr);
443 
444   // Create a Concat operation on the ds
445   ds1 = ds1->Concat({ds2});
446   EXPECT_NE(ds1, nullptr);
447 
448   // Create an iterator over the result of the above dataset
449   // This will trigger the creation of the Execution Tree and launch it.
450   std::shared_ptr<Iterator> iter = ds1->CreateIterator();
451   EXPECT_NE(iter, nullptr);
452 
453   // Iterate the dataset and get each row
454   std::unordered_map<std::string, mindspore::MSTensor> row;
455   ASSERT_OK(iter->GetNextRow(&row));
456 
457   std::shared_ptr<Tensor> de_expect_item;
458   ASSERT_OK(Tensor::CreateScalar((int64_t)999, &de_expect_item));
459   mindspore::MSTensor expect_item = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expect_item));
460 
461   uint64_t i = 0;
462   while (row.size() != 0) {
463     i++;
464     auto label = row["label"];
465     TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
466 
467     EXPECT_MSTENSOR_EQ(label, expect_item);
468 
469     ASSERT_OK(iter->GetNextRow(&row));
470   }
471 
472   EXPECT_EQ(i, 20);
473 
474   // Manually terminate the pipeline
475   iter->Stop();
476 }
477 
TEST_F(MindDataTestPipeline,TestMindDataFail1)478 TEST_F(MindDataTestPipeline, TestMindDataFail1) {
479   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataFail1 with incorrect file path.";
480 
481   // Create a MindData Dataset with incorrect pattern
482   std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/apple.mindrecord0";
483   std::shared_ptr<Dataset> ds1 = MindData(file_path1);
484   EXPECT_NE(ds1, nullptr);
485 
486   // Create an iterator over the result of the above dataset
487   std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
488   // Expect failure: invalid MindData input with incorrect pattern
489   EXPECT_EQ(iter1, nullptr);
490 
491   // Create a MindData Dataset with incorrect file path
492   std::string file_path2 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/apple.mindrecord0";
493   std::vector<std::string> file_list = {file_path2};
494   std::shared_ptr<Dataset> ds2 = MindData(file_list);
495   EXPECT_NE(ds2, nullptr);
496 
497   // Create an iterator over the result of the above dataset
498   std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
499   // Expect failure: invalid MindData input with incorrect file path
500   EXPECT_EQ(iter2, nullptr);
501 
502   // Create a MindData Dataset with incorrect file path
503   // ATTENTION: file_path3 is not a pattern to search for ".mindrecord*"
504   std::string file_path3 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord";
505   std::shared_ptr<Dataset> ds3 = MindData(file_path3);
506   EXPECT_NE(ds3, nullptr);
507 
508   // Create an iterator over the result of the above dataset
509   std::shared_ptr<Iterator> iter3 = ds3->CreateIterator();
510   // Expect failure: invalid MindData input with incorrect file path
511   EXPECT_EQ(iter3, nullptr);
512 }
513 
TEST_F(MindDataTestPipeline,TestMindDataFail2)514 TEST_F(MindDataTestPipeline, TestMindDataFail2) {
515   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataFail2 with incorrect column name.";
516 
517   // Create a MindData Dataset with incorrect column name
518   std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
519   std::shared_ptr<Dataset> ds1 = MindData(file_path1, {""});
520   EXPECT_NE(ds1, nullptr);
521 
522   // Create an iterator over the result of the above dataset
523   std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
524   // Expect failure: invalid MindData input with incorrect column name
525   EXPECT_EQ(iter1, nullptr);
526 
527   // Create a MindData Dataset with duplicate column name
528   std::string file_path2 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
529   std::shared_ptr<Dataset> ds2 = MindData(file_path2, {"label", "label"});
530   EXPECT_NE(ds2, nullptr);
531 
532   // Create an iterator over the result of the above dataset
533   std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
534   // Expect failure: invalid MindData input with duplicate column name
535   EXPECT_EQ(iter2, nullptr);
536 
537   // Create a MindData Dataset with unexpected column name
538   std::string file_path3 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
539   std::vector<std::string> file_list = {file_path3};
540   std::shared_ptr<Dataset> ds3 = MindData(file_list, {"label", "not_exist"});
541   EXPECT_NE(ds3, nullptr);
542 
543   // Create an iterator over the result of the above dataset
544   // This will trigger the creation of the Execution Tree and launch it.
545   std::shared_ptr<Iterator> iter3 = ds3->CreateIterator();
546   // Expect failure: invalid MindData input with unexpected column name
547   EXPECT_EQ(iter3, nullptr);
548 }
549 
TEST_F(MindDataTestPipeline,TestMindDataFail3)550 TEST_F(MindDataTestPipeline, TestMindDataFail3) {
551   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataFail3 with unsupported sampler.";
552 
553   // Create a MindData Dataset with unsupported sampler
554   std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
555   std::shared_ptr<Dataset> ds1 = MindData(file_path1, {}, new WeightedRandomSampler({1, 1, 1, 1}));
556   EXPECT_NE(ds1, nullptr);
557 
558   // Create an iterator over the result of the above dataset
559   std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
560   // Expect failure: invalid MindData input with unsupported sampler
561   EXPECT_EQ(iter1, nullptr);
562 
563   // Create a MindData Dataset with incorrect sampler
564   std::string file_path2 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
565   std::shared_ptr<Dataset> ds2 = MindData(file_path2, {}, nullptr);
566   EXPECT_NE(ds2, nullptr);
567 
568   // Create an iterator over the result of the above dataset
569   std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
570   // Expect failure: invalid MindData input with incorrect sampler
571   EXPECT_EQ(iter2, nullptr);
572 }
573 
TEST_F(MindDataTestPipeline,TestMindDataFail4)574 TEST_F(MindDataTestPipeline, TestMindDataFail4) {
575   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindData with padded sample.";
576 
577   // Create a MindData Dataset
578   std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
579   std::shared_ptr<Dataset> ds1 = MindData(file_path1, {}, std::make_shared<RandomSampler>(), nullptr, 2);
580   EXPECT_NE(ds1, nullptr);
581 
582   // Create an iterator over the result of the above dataset
583   std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
584   // Expect failure: invalid MindData input, num_padded is specified but padded_sample is not
585   EXPECT_EQ(iter1, nullptr);
586 
587   // Create padded sample for MindDataset
588   auto pad = nlohmann::json::object();
589   pad["file_name"] = "1.jpg";
590   pad["label"] = 123456;
591 
592   // Create a MindData Dataset
593   std::string file_path2 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
594   std::shared_ptr<Dataset> ds2 = MindData(file_path2, {"label"}, std::make_shared<RandomSampler>(), &pad, -2);
595   EXPECT_NE(ds2, nullptr);
596 
597   // Create an iterator over the result of the above dataset
598   std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
599   // Expect failure: invalid MindData input, num_padded is not greater than or equal to zero
600   EXPECT_EQ(iter2, nullptr);
601 
602   // Create a MindData Dataset
603   std::string file_path3 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
604   std::shared_ptr<Dataset> ds3 = MindData(file_path3, {}, std::make_shared<RandomSampler>(), &pad, 1);
605   EXPECT_NE(ds3, nullptr);
606 
607   // Create an iterator over the result of the above dataset
608   std::shared_ptr<Iterator> iter3 = ds3->CreateIterator();
609   // Expect failure: invalid MindData input, padded_sample is specified but requires columns_list as well
610   EXPECT_EQ(iter3, nullptr);
611 
612   // Create padded sample with unmatched column name
613   auto pad2 = nlohmann::json::object();
614   pad2["a"] = "1.jpg";
615   pad2["b"] = 123456;
616 
617   // Create a MindData Dataset
618   std::string file_path4 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
619   std::shared_ptr<Dataset> ds4 =
620     MindData(file_path4, {"file_name", "label"}, std::make_shared<RandomSampler>(), &pad2, 1);
621   EXPECT_NE(ds4, nullptr);
622 
623   // Create an iterator over the result of the above dataset
624   std::shared_ptr<Iterator> iter4 = ds4->CreateIterator();
625   // Expect failure: invalid MindData input, columns_list does not match any column in padded_sample
626   EXPECT_EQ(iter4, nullptr);
627 }
628