1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "common/common.h"
17 #include "minddata/dataset/include/dataset/datasets.h"
18 #include "minddata/dataset/core/tensor.h"
19
20 using namespace mindspore::dataset;
21 using mindspore::dataset::Tensor;
22
23 class MindDataTestPipeline : public UT::DatasetOpTesting {
24 protected:
25 };
26
TEST_F(MindDataTestPipeline,TestMindDataSuccess1)27 TEST_F(MindDataTestPipeline, TestMindDataSuccess1) {
28 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess1 with string file pattern.";
29
30 // Create a MindData Dataset
31 // Pass one mindrecord shard file to parse dataset info, and search for other mindrecord files with same dataset info,
32 // thus all records in imagenet.mindrecord0 ~ imagenet.mindrecord3 will be read
33 std::string file_path = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
34 std::shared_ptr<Dataset> ds = MindData(file_path);
35 EXPECT_NE(ds, nullptr);
36
37 // Create an iterator over the result of the above dataset
38 // This will trigger the creation of the Execution Tree and launch it.
39 std::shared_ptr<Iterator> iter = ds->CreateIterator();
40 EXPECT_NE(iter, nullptr);
41
42 // Iterate the dataset and get each row
43 std::unordered_map<std::string, mindspore::MSTensor> row;
44 ASSERT_OK(iter->GetNextRow(&row));
45
46 uint64_t i = 0;
47 while (row.size() != 0) {
48 i++;
49 auto image = row["file_name"];
50 TEST_MS_LOG_MSTENSOR(INFO, "Tensor image file name: ", image);
51
52 ASSERT_OK(iter->GetNextRow(&row));
53 }
54
55 // Each *.mindrecord file has 5 rows, so there are 20 rows in total(imagenet.mindrecord0 ~ imagenet.mindrecord3)
56 EXPECT_EQ(i, 20);
57
58 // Manually terminate the pipeline
59 iter->Stop();
60 }
61
TEST_F(MindDataTestPipeline,TestMindDataGetters)62 TEST_F(MindDataTestPipeline, TestMindDataGetters) {
63 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataGetters with string file pattern.";
64
65 // Create a MindData Dataset
66 // Pass one mindrecord shard file to parse dataset info, and search for other mindrecord files with same dataset info,
67 // thus all records in imagenet.mindrecord0 ~ imagenet.mindrecord3 will be read
68 std::string file_path = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
69 std::shared_ptr<Dataset> ds = MindData(file_path);
70 EXPECT_NE(ds, nullptr);
71
72 std::vector<std::string> column_names = {"data", "file_name", "label"};
73
74 EXPECT_EQ(ds->GetDatasetSize(), 20);
75 EXPECT_EQ(ds->GetColumnNames(), column_names);
76 }
77
TEST_F(MindDataTestPipeline,TestMindDataSuccess2)78 TEST_F(MindDataTestPipeline, TestMindDataSuccess2) {
79 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess2 with a vector of single mindrecord file.";
80
81 // Create a MindData Dataset
82 // Pass a list of mindrecord file name, files in list will be read directly but not search for related files
83 std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
84 std::shared_ptr<Dataset> ds = MindData(std::vector<std::string>{file_path1});
85 EXPECT_NE(ds, nullptr);
86
87 // Create an iterator over the result of the above dataset
88 // This will trigger the creation of the Execution Tree and launch it.
89 std::shared_ptr<Iterator> iter = ds->CreateIterator();
90 EXPECT_NE(iter, nullptr);
91
92 // Iterate the dataset and get each row
93 std::unordered_map<std::string, mindspore::MSTensor> row;
94 ASSERT_OK(iter->GetNextRow(&row));
95
96 uint64_t i = 0;
97 while (row.size() != 0) {
98 i++;
99 auto image = row["file_name"];
100 TEST_MS_LOG_MSTENSOR(INFO, "Tensor image file name: ", image);
101
102 ASSERT_OK(iter->GetNextRow(&row));
103 }
104
105 // Only records in imagenet.mindrecord0 are read
106 EXPECT_EQ(i, 5);
107
108 // Manually terminate the pipeline
109 iter->Stop();
110 }
111
TEST_F(MindDataTestPipeline,TestMindDataSuccess3)112 TEST_F(MindDataTestPipeline, TestMindDataSuccess3) {
113 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess3 with a vector of multiple mindrecord files.";
114
115 // Create a MindData Dataset
116 // Pass a list of mindrecord file name, files in list will be read directly but not search for related files
117 std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
118 std::string file_path2 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord1";
119 std::vector<std::string> file_list = {file_path1, file_path2};
120 std::shared_ptr<Dataset> ds = MindData(file_list);
121 EXPECT_NE(ds, nullptr);
122
123 // Create an iterator over the result of the above dataset
124 // This will trigger the creation of the Execution Tree and launch it.
125 std::shared_ptr<Iterator> iter = ds->CreateIterator();
126 EXPECT_NE(iter, nullptr);
127
128 // Iterate the dataset and get each row
129 std::unordered_map<std::string, mindspore::MSTensor> row;
130 ASSERT_OK(iter->GetNextRow(&row));
131
132 uint64_t i = 0;
133 while (row.size() != 0) {
134 i++;
135 auto image = row["file_name"];
136 TEST_MS_LOG_MSTENSOR(INFO, "Tensor image file name: ", image);
137
138 ASSERT_OK(iter->GetNextRow(&row));
139 }
140
141 // Only records in imagenet.mindrecord0 and imagenet.mindrecord1 are read
142 EXPECT_EQ(i, 10);
143
144 // Manually terminate the pipeline
145 iter->Stop();
146 }
147
TEST_F(MindDataTestPipeline,TestMindDataSuccess4)148 TEST_F(MindDataTestPipeline, TestMindDataSuccess4) {
149 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess4 with specified column.";
150
151 // Create a MindData Dataset
152 // Pass one mindrecord shard file to parse dataset info, and search for other mindrecord files with same dataset info,
153 // thus all records in imagenet.mindrecord0 ~ imagenet.mindrecord3 will be read
154 std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord1";
155 std::shared_ptr<Dataset> ds = MindData(file_path1, {"label"});
156 EXPECT_NE(ds, nullptr);
157
158 // Create an iterator over the result of the above dataset
159 // This will trigger the creation of the Execution Tree and launch it.
160 std::shared_ptr<Iterator> iter = ds->CreateIterator();
161 EXPECT_NE(iter, nullptr);
162
163 // Iterate the dataset and get each row
164 std::unordered_map<std::string, mindspore::MSTensor> row;
165 ASSERT_OK(iter->GetNextRow(&row));
166
167 uint64_t i = 0;
168 while (row.size() != 0) {
169 i++;
170 auto label = row["label"];
171 TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
172 ASSERT_OK(iter->GetNextRow(&row));
173 }
174
175 // Shard file "mindrecord0/mindrecord1/mindrecord2/mindrecord3" have same dataset info,
176 // thus if input file is any of them, all records in imagenet.mindrecord* will be read
177 EXPECT_EQ(i, 20);
178
179 // Manually terminate the pipeline
180 iter->Stop();
181 }
182
TEST_F(MindDataTestPipeline,TestMindDataSuccess5)183 TEST_F(MindDataTestPipeline, TestMindDataSuccess5) {
184 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess5 with specified sampler.";
185
186 // Create a MindData Dataset
187 // Pass one mindrecord shard file to parse dataset info, and search for other mindrecord files with same dataset info,
188 // thus all records in imagenet.mindrecord0 ~ imagenet.mindrecord3 will be read
189 std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
190 std::shared_ptr<Dataset> ds = MindData(file_path1, {}, std::make_shared<SequentialSampler>(0, 3));
191 EXPECT_NE(ds, nullptr);
192
193 // Create an iterator over the result of the above dataset
194 // This will trigger the creation of the Execution Tree and launch it.
195 std::shared_ptr<Iterator> iter = ds->CreateIterator();
196 EXPECT_NE(iter, nullptr);
197
198 // Iterate the dataset and get each row
199 std::unordered_map<std::string, mindspore::MSTensor> row;
200 ASSERT_OK(iter->GetNextRow(&row));
201
202 std::shared_ptr<Tensor> de_expect_item;
203 ASSERT_OK(Tensor::CreateScalar((int64_t)0, &de_expect_item));
204 mindspore::MSTensor expect_item = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expect_item));
205
206 uint64_t i = 0;
207 while (row.size() != 0) {
208 i++;
209 auto label = row["label"];
210
211 EXPECT_MSTENSOR_EQ(label, expect_item);
212
213 ASSERT_OK(iter->GetNextRow(&row));
214 }
215
216 // SequentialSampler will return 3 samples
217 EXPECT_EQ(i, 3);
218
219 // Manually terminate the pipeline
220 iter->Stop();
221 }
222
TEST_F(MindDataTestPipeline,TestMindDataSuccess6)223 TEST_F(MindDataTestPipeline, TestMindDataSuccess6) {
224 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess6 with num_samples out of range.";
225
226 // Create a MindData Dataset
227 // Pass a list of mindrecord file name, files in list will be read directly but not search for related files
228 // imagenet.mindrecord0 file has 5 rows, but num_samples is larger than 5
229 std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
230 std::vector<std::string> file_list = {file_path1};
231
232 // Check sequential sampler, output number is 5
233 std::shared_ptr<Dataset> ds1 = MindData(file_list, {}, std::make_shared<SequentialSampler>(0, 10));
234 EXPECT_NE(ds1, nullptr);
235
236 // Check random sampler, output number is 5, same rows with file
237 std::shared_ptr<Dataset> ds2 = MindData(file_list, {}, std::make_shared<RandomSampler>(false, 10));
238 EXPECT_NE(ds2, nullptr);
239
240 // Check pk sampler, output number is 2, get 2 samples with label 0
241 std::shared_ptr<Dataset> ds3 = MindData(file_list, {}, std::make_shared<PKSampler>(2, false, 10));
242 EXPECT_NE(ds3, nullptr);
243
244 // Check distributed sampler, output number is 3, get 3 samples in shard 0
245 std::shared_ptr<Dataset> ds4 = MindData(file_list, {}, std::make_shared<DistributedSampler>(2, 0, false, 10));
246 EXPECT_NE(ds4, nullptr);
247
248 // Check distributed sampler get 3 samples with indice 0, 1 ,2
249 std::shared_ptr<Dataset> ds5 = MindData(file_list, {}, new SubsetRandomSampler({0, 1, 2}, 10));
250 EXPECT_NE(ds5, nullptr);
251
252 std::shared_ptr<Dataset> ds6 = MindData(file_list, {}, new SubsetSampler({1, 2}, 10));
253 EXPECT_NE(ds5, nullptr);
254
255 std::vector<std::shared_ptr<Dataset>> ds = {ds1, ds2, ds3, ds4, ds5, ds6};
256 std::vector<int32_t> expected_samples = {5, 5, 2, 3, 3, 2};
257
258 for (int32_t i = 0; i < ds.size(); i++) {
259 // Create an iterator over the result of the above dataset
260 // This will trigger the creation of the Execution Tree and launch it.
261 std::shared_ptr<Iterator> iter = ds[i]->CreateIterator();
262 EXPECT_NE(iter, nullptr);
263
264 // Iterate the dataset and get each row
265 std::unordered_map<std::string, mindspore::MSTensor> row;
266 ASSERT_OK(iter->GetNextRow(&row));
267
268 uint64_t j = 0;
269 while (row.size() != 0) {
270 j++;
271 auto label = row["label"];
272 TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
273 ASSERT_OK(iter->GetNextRow(&row));
274 }
275 EXPECT_EQ(j, expected_samples[i]);
276
277 // Manually terminate the pipeline
278 iter->Stop();
279 }
280 }
281
TEST_F(MindDataTestPipeline,TestMindDataSuccess7)282 TEST_F(MindDataTestPipeline, TestMindDataSuccess7) {
283 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess7 with padded sample.";
284
285 // Create pad sample for MindDataset
286 auto pad = nlohmann::json::object();
287 pad["file_name"] = "does_not_exist.jpg";
288 pad["label"] = 999;
289
290 // Create a MindData Dataset
291 // Pass a list of mindrecord file name, files in list will be read directly but not search for related files
292 std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
293 std::vector<std::string> file_list = {file_path1};
294 std::shared_ptr<Dataset> ds =
295 MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), &pad, 4);
296 EXPECT_NE(ds, nullptr);
297
298 // Create a Skip operation on ds, skip original data in mindrecord and get padded samples
299 ds = ds->Skip(5);
300 EXPECT_NE(ds, nullptr);
301
302 // Create an iterator over the result of the above dataset
303 // This will trigger the creation of the Execution Tree and launch it.
304 std::shared_ptr<Iterator> iter = ds->CreateIterator();
305 EXPECT_NE(iter, nullptr);
306
307 // Iterate the dataset and get each row
308 std::unordered_map<std::string, mindspore::MSTensor> row;
309 ASSERT_OK(iter->GetNextRow(&row));
310
311 std::shared_ptr<Tensor> de_expect_item;
312 ASSERT_OK(Tensor::CreateScalar((int64_t)999, &de_expect_item));
313 mindspore::MSTensor expect_item = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expect_item));
314
315 uint64_t i = 0;
316 while (row.size() != 0) {
317 i++;
318 auto image = row["file_name"];
319 auto label = row["label"];
320 TEST_MS_LOG_MSTENSOR(INFO, "Tensor image file name: ", image);
321 TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
322
323 EXPECT_MSTENSOR_EQ(label, expect_item);
324
325 ASSERT_OK(iter->GetNextRow(&row));
326 }
327
328 EXPECT_EQ(i, 4);
329
330 // Manually terminate the pipeline
331 iter->Stop();
332 }
333
TEST_F(MindDataTestPipeline,TestMindDataSuccess8)334 TEST_F(MindDataTestPipeline, TestMindDataSuccess8) {
335 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess8 with padded sample.";
336
337 // Create pad sample for MindDataset
338 auto pad = nlohmann::json::object();
339 pad["file_name"] = "does_not_exist.jpg";
340 pad["label"] = 999;
341
342 // Create a MindData Dataset
343 // Pass a list of mindrecord file name, files in list will be read directly but not search for related files
344 std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
345 std::vector<std::string> file_list = {file_path1};
346 std::shared_ptr<Dataset> ds =
347 MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), &pad, 4);
348 EXPECT_NE(ds, nullptr);
349
350 std::vector<mindspore::dataset::DataType> types = ToDETypes(ds->GetOutputTypes());
351 std::vector<mindspore::dataset::TensorShape> shapes = ToTensorShapeVec(ds->GetOutputShapes());
352 std::vector<std::string> column_names = {"file_name", "label"};
353 EXPECT_EQ(types.size(), 2);
354 EXPECT_EQ(types[0].ToString(), "string");
355 EXPECT_EQ(types[1].ToString(), "int64");
356 EXPECT_EQ(shapes.size(), 2);
357 EXPECT_EQ(shapes[0].ToString(), "<>");
358 EXPECT_EQ(shapes[1].ToString(), "<>");
359 EXPECT_EQ(ds->GetDatasetSize(), 5);
360 EXPECT_EQ(ds->GetRepeatCount(), 1);
361 EXPECT_EQ(ds->GetColumnNames(), column_names);
362
363 // Create a Skip operation on ds, skip original data in mindrecord and get padded samples
364 ds = ds->Skip(5);
365 EXPECT_NE(ds, nullptr);
366
367 // Create a Repeat operation on ds
368 int32_t repeat_num = 2;
369 ds = ds->Repeat(repeat_num);
370 EXPECT_NE(ds, nullptr);
371 EXPECT_EQ(ds->GetRepeatCount(), 2);
372
373 // Create an iterator over the result of the above dataset
374 // This will trigger the creation of the Execution Tree and launch it.
375 std::shared_ptr<Iterator> iter = ds->CreateIterator();
376 EXPECT_NE(iter, nullptr);
377
378 // Iterate the dataset and get each row
379 std::unordered_map<std::string, mindspore::MSTensor> row;
380 ASSERT_OK(iter->GetNextRow(&row));
381
382 std::shared_ptr<Tensor> de_expect_item;
383 ASSERT_OK(Tensor::CreateScalar((int64_t)999, &de_expect_item));
384 mindspore::MSTensor expect_item = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expect_item));
385
386 uint64_t i = 0;
387 while (row.size() != 0) {
388 i++;
389 auto image = row["file_name"];
390 auto label = row["label"];
391 TEST_MS_LOG_MSTENSOR(INFO, "Tensor image file name: ", image);
392 TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
393
394 EXPECT_MSTENSOR_EQ(label, expect_item);
395
396 ASSERT_OK(iter->GetNextRow(&row));
397 }
398
399 EXPECT_EQ(i, 8);
400
401 // Manually terminate the pipeline
402 iter->Stop();
403 }
404
TEST_F(MindDataTestPipeline,TestMindDataSuccess9)405 TEST_F(MindDataTestPipeline, TestMindDataSuccess9) {
406 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataSuccess9 with padded sample.";
407
408 // Create pad sample for MindDataset
409 auto pad = nlohmann::json::object();
410 pad["file_name"] = "does_not_exist.jpg";
411 pad["label"] = 999;
412
413 // Create a MindData Dataset
414 // Pass a list of mindrecord file name, files in list will be read directly but not search for related files
415 std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
416 std::vector<std::string> file_list = {file_path1};
417 std::shared_ptr<Dataset> ds1 =
418 MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), &pad, 4);
419 EXPECT_NE(ds1, nullptr);
420 ds1 = ds1->Skip(5);
421 EXPECT_NE(ds1, nullptr);
422
423 std::shared_ptr<Dataset> ds2 =
424 MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), &pad, 4);
425 EXPECT_NE(ds2, nullptr);
426 ds2 = ds2->Skip(5);
427 EXPECT_NE(ds2, nullptr);
428
429 // Create a Repeat operation on ds
430 int32_t repeat_num = 2;
431 ds1 = ds1->Repeat(repeat_num);
432 EXPECT_NE(ds1, nullptr);
433 repeat_num = 3;
434 ds2 = ds2->Repeat(repeat_num);
435 EXPECT_NE(ds2, nullptr);
436
437 // Create a Project operation on ds
438 std::vector<std::string> column_project = {"label"};
439 ds1 = ds1->Project(column_project);
440 EXPECT_NE(ds1, nullptr);
441 ds2 = ds2->Project(column_project);
442 EXPECT_NE(ds2, nullptr);
443
444 // Create a Concat operation on the ds
445 ds1 = ds1->Concat({ds2});
446 EXPECT_NE(ds1, nullptr);
447
448 // Create an iterator over the result of the above dataset
449 // This will trigger the creation of the Execution Tree and launch it.
450 std::shared_ptr<Iterator> iter = ds1->CreateIterator();
451 EXPECT_NE(iter, nullptr);
452
453 // Iterate the dataset and get each row
454 std::unordered_map<std::string, mindspore::MSTensor> row;
455 ASSERT_OK(iter->GetNextRow(&row));
456
457 std::shared_ptr<Tensor> de_expect_item;
458 ASSERT_OK(Tensor::CreateScalar((int64_t)999, &de_expect_item));
459 mindspore::MSTensor expect_item = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expect_item));
460
461 uint64_t i = 0;
462 while (row.size() != 0) {
463 i++;
464 auto label = row["label"];
465 TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
466
467 EXPECT_MSTENSOR_EQ(label, expect_item);
468
469 ASSERT_OK(iter->GetNextRow(&row));
470 }
471
472 EXPECT_EQ(i, 20);
473
474 // Manually terminate the pipeline
475 iter->Stop();
476 }
477
TEST_F(MindDataTestPipeline,TestMindDataFail1)478 TEST_F(MindDataTestPipeline, TestMindDataFail1) {
479 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataFail1 with incorrect file path.";
480
481 // Create a MindData Dataset with incorrect pattern
482 std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/apple.mindrecord0";
483 std::shared_ptr<Dataset> ds1 = MindData(file_path1);
484 EXPECT_NE(ds1, nullptr);
485
486 // Create an iterator over the result of the above dataset
487 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
488 // Expect failure: invalid MindData input with incorrect pattern
489 EXPECT_EQ(iter1, nullptr);
490
491 // Create a MindData Dataset with incorrect file path
492 std::string file_path2 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/apple.mindrecord0";
493 std::vector<std::string> file_list = {file_path2};
494 std::shared_ptr<Dataset> ds2 = MindData(file_list);
495 EXPECT_NE(ds2, nullptr);
496
497 // Create an iterator over the result of the above dataset
498 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
499 // Expect failure: invalid MindData input with incorrect file path
500 EXPECT_EQ(iter2, nullptr);
501
502 // Create a MindData Dataset with incorrect file path
503 // ATTENTION: file_path3 is not a pattern to search for ".mindrecord*"
504 std::string file_path3 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord";
505 std::shared_ptr<Dataset> ds3 = MindData(file_path3);
506 EXPECT_NE(ds3, nullptr);
507
508 // Create an iterator over the result of the above dataset
509 std::shared_ptr<Iterator> iter3 = ds3->CreateIterator();
510 // Expect failure: invalid MindData input with incorrect file path
511 EXPECT_EQ(iter3, nullptr);
512 }
513
TEST_F(MindDataTestPipeline,TestMindDataFail2)514 TEST_F(MindDataTestPipeline, TestMindDataFail2) {
515 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataFail2 with incorrect column name.";
516
517 // Create a MindData Dataset with incorrect column name
518 std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
519 std::shared_ptr<Dataset> ds1 = MindData(file_path1, {""});
520 EXPECT_NE(ds1, nullptr);
521
522 // Create an iterator over the result of the above dataset
523 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
524 // Expect failure: invalid MindData input with incorrect column name
525 EXPECT_EQ(iter1, nullptr);
526
527 // Create a MindData Dataset with duplicate column name
528 std::string file_path2 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
529 std::shared_ptr<Dataset> ds2 = MindData(file_path2, {"label", "label"});
530 EXPECT_NE(ds2, nullptr);
531
532 // Create an iterator over the result of the above dataset
533 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
534 // Expect failure: invalid MindData input with duplicate column name
535 EXPECT_EQ(iter2, nullptr);
536
537 // Create a MindData Dataset with unexpected column name
538 std::string file_path3 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
539 std::vector<std::string> file_list = {file_path3};
540 std::shared_ptr<Dataset> ds3 = MindData(file_list, {"label", "not_exist"});
541 EXPECT_NE(ds3, nullptr);
542
543 // Create an iterator over the result of the above dataset
544 // This will trigger the creation of the Execution Tree and launch it.
545 std::shared_ptr<Iterator> iter3 = ds3->CreateIterator();
546 // Expect failure: invalid MindData input with unexpected column name
547 EXPECT_EQ(iter3, nullptr);
548 }
549
TEST_F(MindDataTestPipeline,TestMindDataFail3)550 TEST_F(MindDataTestPipeline, TestMindDataFail3) {
551 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataFail3 with unsupported sampler.";
552
553 // Create a MindData Dataset with unsupported sampler
554 std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
555 std::shared_ptr<Dataset> ds1 = MindData(file_path1, {}, new WeightedRandomSampler({1, 1, 1, 1}));
556 EXPECT_NE(ds1, nullptr);
557
558 // Create an iterator over the result of the above dataset
559 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
560 // Expect failure: invalid MindData input with unsupported sampler
561 EXPECT_EQ(iter1, nullptr);
562
563 // Create a MindData Dataset with incorrect sampler
564 std::string file_path2 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
565 std::shared_ptr<Dataset> ds2 = MindData(file_path2, {}, nullptr);
566 EXPECT_NE(ds2, nullptr);
567
568 // Create an iterator over the result of the above dataset
569 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
570 // Expect failure: invalid MindData input with incorrect sampler
571 EXPECT_EQ(iter2, nullptr);
572 }
573
TEST_F(MindDataTestPipeline,TestMindDataFail4)574 TEST_F(MindDataTestPipeline, TestMindDataFail4) {
575 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindData with padded sample.";
576
577 // Create a MindData Dataset
578 std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
579 std::shared_ptr<Dataset> ds1 = MindData(file_path1, {}, std::make_shared<RandomSampler>(), nullptr, 2);
580 EXPECT_NE(ds1, nullptr);
581
582 // Create an iterator over the result of the above dataset
583 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
584 // Expect failure: invalid MindData input, num_padded is specified but padded_sample is not
585 EXPECT_EQ(iter1, nullptr);
586
587 // Create padded sample for MindDataset
588 auto pad = nlohmann::json::object();
589 pad["file_name"] = "1.jpg";
590 pad["label"] = 123456;
591
592 // Create a MindData Dataset
593 std::string file_path2 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
594 std::shared_ptr<Dataset> ds2 = MindData(file_path2, {"label"}, std::make_shared<RandomSampler>(), &pad, -2);
595 EXPECT_NE(ds2, nullptr);
596
597 // Create an iterator over the result of the above dataset
598 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
599 // Expect failure: invalid MindData input, num_padded is not greater than or equal to zero
600 EXPECT_EQ(iter2, nullptr);
601
602 // Create a MindData Dataset
603 std::string file_path3 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
604 std::shared_ptr<Dataset> ds3 = MindData(file_path3, {}, std::make_shared<RandomSampler>(), &pad, 1);
605 EXPECT_NE(ds3, nullptr);
606
607 // Create an iterator over the result of the above dataset
608 std::shared_ptr<Iterator> iter3 = ds3->CreateIterator();
609 // Expect failure: invalid MindData input, padded_sample is specified but requires columns_list as well
610 EXPECT_EQ(iter3, nullptr);
611
612 // Create padded sample with unmatched column name
613 auto pad2 = nlohmann::json::object();
614 pad2["a"] = "1.jpg";
615 pad2["b"] = 123456;
616
617 // Create a MindData Dataset
618 std::string file_path4 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
619 std::shared_ptr<Dataset> ds4 =
620 MindData(file_path4, {"file_name", "label"}, std::make_shared<RandomSampler>(), &pad2, 1);
621 EXPECT_NE(ds4, nullptr);
622
623 // Create an iterator over the result of the above dataset
624 std::shared_ptr<Iterator> iter4 = ds4->CreateIterator();
625 // Expect failure: invalid MindData input, columns_list does not match any column in padded_sample
626 EXPECT_EQ(iter4, nullptr);
627 }
628