1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "common/common.h"
17 #include "include/api/types.h"
18 #include "minddata/dataset/core/tensor_row.h"
19 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
20 #include "minddata/dataset/include/dataset/datasets.h"
21 #include "minddata/dataset/include/dataset/vision.h"
22
23 using namespace mindspore::dataset;
24 using mindspore::dataset::Tensor;
25
26 class MindDataTestPipeline : public UT::DatasetOpTesting {
27 protected:
28 };
29
VecToRow(const MSTensorVec & v)30 TensorRow VecToRow(const MSTensorVec &v) {
31 TensorRow row;
32 for (const mindspore::MSTensor &t : v) {
33 std::shared_ptr<Tensor> rt;
34 (void)Tensor::CreateFromMemory(TensorShape(t.Shape()), MSTypeToDEType(static_cast<mindspore::TypeId>(t.DataType())),
35 (const uchar *)(t.Data().get()), t.DataSize(), &rt);
36 row.emplace_back(rt);
37 }
38 return row;
39 }
RowToVec(const TensorRow & v)40 MSTensorVec RowToVec(const TensorRow &v) {
41 MSTensorVec rv; // std::make_shared<DETensor>(de_tensor)
42 std::transform(v.begin(), v.end(), std::back_inserter(rv), [](std::shared_ptr<Tensor> t) -> mindspore::MSTensor {
43 return mindspore::MSTensor(std::make_shared<DETensor>(t));
44 });
45 return rv;
46 }
47
BucketBatchTestFunction(MSTensorVec input)48 MSTensorVec BucketBatchTestFunction(MSTensorVec input) {
49 mindspore::dataset::TensorRow output;
50 std::shared_ptr<Tensor> out;
51 (void)Tensor::CreateEmpty(mindspore::dataset::TensorShape({1}),
52 mindspore::dataset::DataType(mindspore::dataset::DataType::Type::DE_INT32), &out);
53 (void)out->SetItemAt({0}, 2);
54 output.push_back(out);
55 return RowToVec(output);
56 }
57
Predicate1(MSTensorVec in)58 MSTensorVec Predicate1(MSTensorVec in) {
59 // Return true if input is equal to 3
60 uint64_t input_value;
61 TensorRow input = VecToRow(in);
62 (void)input.at(0)->GetItemAt(&input_value, {0});
63 bool result = (input_value == 3);
64
65 // Convert from boolean to TensorRow
66 TensorRow output;
67 std::shared_ptr<Tensor> out;
68 (void)Tensor::CreateEmpty(mindspore::dataset::TensorShape({}),
69 mindspore::dataset::DataType(mindspore::dataset::DataType::Type::DE_BOOL), &out);
70 (void)out->SetItemAt({}, result);
71 output.push_back(out);
72
73 return RowToVec(output);
74 }
75
Predicate2(MSTensorVec in)76 MSTensorVec Predicate2(MSTensorVec in) {
77 // Return true if label is more than 1
78 // The index of label in input is 1
79 uint64_t input_value;
80 TensorRow input = VecToRow(in);
81 (void)input.at(1)->GetItemAt(&input_value, {0});
82 bool result = (input_value > 1);
83
84 // Convert from boolean to TensorRow
85 TensorRow output;
86 std::shared_ptr<Tensor> out;
87 (void)Tensor::CreateEmpty(mindspore::dataset::TensorShape({}),
88 mindspore::dataset::DataType(mindspore::dataset::DataType::Type::DE_BOOL), &out);
89 (void)out->SetItemAt({}, result);
90 output.push_back(out);
91
92 return RowToVec(output);
93 }
94
TEST_F(MindDataTestPipeline,TestBatchAndRepeat)95 TEST_F(MindDataTestPipeline, TestBatchAndRepeat) {
96 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBatchAndRepeat.";
97
98 // Create a Mnist Dataset
99 std::string folder_path = datasets_root_path_ + "/testMnistData/";
100 std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
101 EXPECT_NE(ds, nullptr);
102
103 // Create a Repeat operation on ds
104 int32_t repeat_num = 2;
105 ds = ds->Repeat(repeat_num);
106 EXPECT_NE(ds, nullptr);
107
108 // Create a Batch operation on ds
109 int32_t batch_size = 2;
110 ds = ds->Batch(batch_size);
111 EXPECT_NE(ds, nullptr);
112
113 // Create an iterator over the result of the above dataset
114 // This will trigger the creation of the Execution Tree and launch it.
115 std::shared_ptr<Iterator> iter = ds->CreateIterator();
116 EXPECT_NE(iter, nullptr);
117
118 // iterate over the dataset and get each row
119 std::unordered_map<std::string, mindspore::MSTensor> row;
120 ASSERT_OK(iter->GetNextRow(&row));
121
122 uint64_t i = 0;
123 while (row.size() != 0) {
124 i++;
125 auto image = row["image"];
126 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
127 ASSERT_OK(iter->GetNextRow(&row));
128 }
129
130 EXPECT_EQ(i, 10);
131
132 // Manually terminate the pipeline
133 iter->Stop();
134 }
135
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthSuccess1)136 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthSuccess1) {
137 // Calling with default values
138 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthSuccess1.";
139
140 // Create a Mnist Dataset
141 std::string folder_path = datasets_root_path_ + "/testMnistData/";
142 std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
143 EXPECT_NE(ds, nullptr);
144
145 // Create a BucketBatchByLength operation on ds
146 ds = ds->BucketBatchByLength({"image"}, {1, 2, 3}, {4, 5, 6, 7});
147 EXPECT_NE(ds, nullptr);
148
149 // Create an iterator over the result of the above dataset
150 // This will trigger the creation of the Execution Tree and launch it.
151 std::shared_ptr<Iterator> iter = ds->CreateIterator();
152 EXPECT_NE(iter, nullptr);
153
154 // iterate over the dataset and get each row
155 std::unordered_map<std::string, mindspore::MSTensor> row;
156 ASSERT_OK(iter->GetNextRow(&row));
157
158 uint64_t i = 0;
159 while (row.size() != 0) {
160 i++;
161 auto image = row["image"];
162 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
163 ASSERT_OK(iter->GetNextRow(&row));
164 }
165 // 2 batches of size 5
166 EXPECT_EQ(i, 2);
167
168 // Manually terminate the pipeline
169 iter->Stop();
170 }
171
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthSuccess2)172 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthSuccess2) {
173 // Calling with non-default values
174 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthSuccess2.";
175
176 // Create a Mnist Dataset
177 std::string folder_path = datasets_root_path_ + "/testMnistData/";
178 std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
179 EXPECT_NE(ds, nullptr);
180
181 // Create a BucketBatchByLength operation on ds
182 std::map<std::string, std::pair<std::vector<int64_t>, mindspore::MSTensor>> pad_info = {};
183 ds = ds->BucketBatchByLength({"image"}, {1, 2}, {1, 2, 3}, &BucketBatchTestFunction, pad_info, true, true);
184 EXPECT_NE(ds, nullptr);
185
186 // Create an iterator over the result of the above dataset
187 // This will trigger the creation of the Execution Tree and launch it.
188 std::shared_ptr<Iterator> iter = ds->CreateIterator();
189 EXPECT_NE(iter, nullptr);
190
191 // Iterate over the dataset and get each row
192 std::unordered_map<std::string, mindspore::MSTensor> row;
193 ASSERT_OK(iter->GetNextRow(&row));
194
195 uint64_t i = 0;
196 while (row.size() != 0) {
197 i++;
198 auto image = row["image"];
199 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
200 ASSERT_OK(iter->GetNextRow(&row));
201 }
202 // With 2 boundaries, 3 buckets are created
203 EXPECT_EQ(i, 3);
204
205 // Manually terminate the pipeline
206 iter->Stop();
207 }
208
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail1)209 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail1) {
210 // Empty bucket_boundaries
211 // Calling with function pointer
212 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail1.";
213
214 // Create a Mnist Dataset
215 std::string folder_path = datasets_root_path_ + "/testMnistData/";
216 std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
217 EXPECT_NE(ds, nullptr);
218
219 // Create a BucketBatchByLength operation on ds
220 ds = ds->BucketBatchByLength({"image"}, {}, {1});
221 EXPECT_NE(ds, nullptr);
222
223 // Create an iterator over the result of the above dataset
224 std::shared_ptr<Iterator> iter = ds->CreateIterator();
225 // Expect failure: invalid Op input
226 EXPECT_EQ(iter, nullptr);
227 }
228
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail2)229 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail2) {
230 // Empty bucket_batch_sizes
231 // Calling with function pointer
232 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail2.";
233
234 // Create a Mnist Dataset
235 std::string folder_path = datasets_root_path_ + "/testMnistData/";
236 std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
237 EXPECT_NE(ds, nullptr);
238
239 // Create a BucketBatchByLength operation on ds
240 ds = ds->BucketBatchByLength({"image"}, {1}, {});
241 EXPECT_NE(ds, nullptr);
242
243 // Create an iterator over the result of the above dataset
244 std::shared_ptr<Iterator> iter = ds->CreateIterator();
245 // Expect failure: invalid Op input
246 EXPECT_EQ(iter, nullptr);
247 }
248
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail3)249 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail3) {
250 // Negative boundaries
251 // Calling with function pointer
252 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail3.";
253
254 // Create a Mnist Dataset
255 std::string folder_path = datasets_root_path_ + "/testMnistData/";
256 std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
257 EXPECT_NE(ds, nullptr);
258
259 // Create a BucketBatchByLength operation on ds
260 ds = ds->BucketBatchByLength({"image"}, {-1, 1}, {1, 2, 3});
261 EXPECT_NE(ds, nullptr);
262
263 // Create an iterator over the result of the above dataset
264 std::shared_ptr<Iterator> iter = ds->CreateIterator();
265 // Expect failure: invalid Op input
266 EXPECT_EQ(iter, nullptr);
267 }
268
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail4)269 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail4) {
270 // Boundaries not strictly increasing
271 // Calling with function pointer
272 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail4.";
273
274 // Create a Mnist Dataset
275 std::string folder_path = datasets_root_path_ + "/testMnistData/";
276 std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
277 EXPECT_NE(ds, nullptr);
278
279 // Create a BucketBatchByLength operation on ds
280 ds = ds->BucketBatchByLength({"image"}, {2, 2}, {1, 2, 3});
281 EXPECT_NE(ds, nullptr);
282
283 // Create an iterator over the result of the above dataset
284 std::shared_ptr<Iterator> iter = ds->CreateIterator();
285 // Expect failure: invalid Op input
286 EXPECT_EQ(iter, nullptr);
287 }
288
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail5)289 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail5) {
290 // Incorrect size of bucket_batch_size
291 // Calling with function pointer
292 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail5.";
293
294 // Create a Mnist Dataset
295 std::string folder_path = datasets_root_path_ + "/testMnistData/";
296 std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
297 EXPECT_NE(ds, nullptr);
298
299 // Create a BucketBatchByLength operation on ds
300 ds = ds->BucketBatchByLength({"image"}, {1, 2}, {1, 2});
301 EXPECT_NE(ds, nullptr);
302
303 // Create an iterator over the result of the above dataset
304 std::shared_ptr<Iterator> iter = ds->CreateIterator();
305 // Expect failure: invalid Op input
306 EXPECT_EQ(iter, nullptr);
307 }
308
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail6)309 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail6) {
310 // Negative bucket_batch_size
311 // Calling with function pointer
312 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail6.";
313
314 // Create a Mnist Dataset
315 std::string folder_path = datasets_root_path_ + "/testMnistData/";
316 std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
317 EXPECT_NE(ds, nullptr);
318 // Create a BucketBatchByLength operation on ds
319 ds = ds->BucketBatchByLength({"image"}, {1, 2}, {1, -2, 3});
320 EXPECT_NE(ds, nullptr);
321
322 // Create an iterator over the result of the above dataset
323 std::shared_ptr<Iterator> iter = ds->CreateIterator();
324 // Expect failure: invalid Op input
325 EXPECT_EQ(iter, nullptr);
326 }
327
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail7)328 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail7) {
329 // This should fail because element_length_function is not specified and column_names has more than 1 element.
330 // Calling with function pointer
331 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail7.";
332
333 // Create a Mnist Dataset
334 std::string folder_path = datasets_root_path_ + "/testMnistData/";
335 std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
336 EXPECT_NE(ds, nullptr);
337
338 // Create a BucketBatchByLength operation on ds
339 ds = ds->BucketBatchByLength({"image", "label"}, {1, 2}, {1, 2, 3});
340 EXPECT_NE(ds, nullptr);
341
342 // Create an iterator over the result of the above dataset
343 std::shared_ptr<Iterator> iter = ds->CreateIterator();
344 // Expect failure: invalid Op input
345 EXPECT_EQ(iter, nullptr);
346 }
347
TEST_F(MindDataTestPipeline,TestConcatFail1)348 TEST_F(MindDataTestPipeline, TestConcatFail1) {
349 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail1.";
350 // This case is expected to fail because the input column names of concatenated datasets are not the same
351
352 // Create an ImageFolder Dataset
353 // Column names: {"image", "label"}
354 std::string folder_path = datasets_root_path_ + "/testPK/data/";
355 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
356 EXPECT_NE(ds, nullptr);
357 std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
358 EXPECT_NE(ds, nullptr);
359
360 // Create a Rename operation on ds
361 ds2 = ds2->Rename({"image", "label"}, {"col1", "col2"});
362 EXPECT_NE(ds, nullptr);
363
364 // Create a Concat operation on the ds
365 // Name of datasets to concat doesn't not match
366 ds = ds->Concat({ds2});
367 EXPECT_NE(ds, nullptr);
368
369 // Create a Batch operation on ds
370 int32_t batch_size = 1;
371 ds = ds->Batch(batch_size);
372 EXPECT_NE(ds, nullptr);
373
374 // Create an iterator over the result of the above dataset
375 // This will trigger the creation of the Execution Tree and launch it.
376 std::shared_ptr<Iterator> iter = ds->CreateIterator();
377 EXPECT_EQ(iter, nullptr);
378 }
379
TEST_F(MindDataTestPipeline,TestConcatFail2)380 TEST_F(MindDataTestPipeline, TestConcatFail2) {
381 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail2.";
382 // This case is expected to fail because the input dataset is empty.
383
384 // Create an ImageFolder Dataset
385 std::string folder_path = datasets_root_path_ + "/testPK/data/";
386 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
387 EXPECT_NE(ds, nullptr);
388
389 // Create a Concat operation on the ds
390 // Input dataset to concat is empty
391 ds = ds->Concat({});
392 EXPECT_NE(ds, nullptr);
393
394 // Create an iterator over the result of the above dataset
395 std::shared_ptr<Iterator> iter = ds->CreateIterator();
396 // Expect failure: invalid Op input
397 EXPECT_EQ(iter, nullptr);
398 }
399
TEST_F(MindDataTestPipeline,TestConcatFail3)400 TEST_F(MindDataTestPipeline, TestConcatFail3) {
401 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail3.";
402 // This case is expected to fail because the input dataset is nullptr.
403
404 // Create an ImageFolder Dataset
405 std::string folder_path = datasets_root_path_ + "/testPK/data/";
406 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
407 EXPECT_NE(ds, nullptr);
408
409 // Create a Concat operation on the ds
410 // Input dataset to concat is null
411 ds = ds->Concat({nullptr});
412 EXPECT_NE(ds, nullptr);
413
414 // Create an iterator over the result of the above dataset
415 std::shared_ptr<Iterator> iter = ds->CreateIterator();
416 // Expect failure: invalid Op input
417 EXPECT_EQ(iter, nullptr);
418 }
419
TEST_F(MindDataTestPipeline,TestConcatFail4)420 TEST_F(MindDataTestPipeline, TestConcatFail4) {
421 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail4.";
422 // This case is expected to fail because the input dataset is nullptr.
423
424 // Create an ImageFolder Dataset
425 std::string folder_path = datasets_root_path_ + "/testPK/data/";
426 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
427 EXPECT_NE(ds, nullptr);
428
429 // Create a Concat operation on the ds
430 // Input dataset to concat is null
431 ds = ds + nullptr;
432 EXPECT_NE(ds, nullptr);
433
434 // Create an iterator over the result of the above dataset
435 std::shared_ptr<Iterator> iter = ds->CreateIterator();
436 // Expect failure: invalid Op input
437 EXPECT_EQ(iter, nullptr);
438 }
439
TEST_F(MindDataTestPipeline,TestConcatFail5)440 TEST_F(MindDataTestPipeline, TestConcatFail5) {
441 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail5.";
442 // This case is expected to fail because the dataset concat itself which causes ProjectNode has two parent nodes
443
444 // Create an ImageFolder Dataset
445 std::string folder_path = datasets_root_path_ + "/testPK/data/";
446 std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
447 EXPECT_NE(ds1, nullptr);
448
449 std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
450 EXPECT_NE(ds2, nullptr);
451
452 // Create a Project operation on ds
453 ds1 = ds1->Project({"image"});
454 EXPECT_NE(ds1, nullptr);
455 ds2 = ds2->Project({"image"});
456 EXPECT_NE(ds2, nullptr);
457
458 // Create a Concat operation on the ds
459 // Input dataset is the dataset itself
460 ds1 = ds1 + ds1 + ds2;
461 EXPECT_NE(ds1, nullptr);
462
463 // Create an iterator over the result of the above dataset
464 std::shared_ptr<Iterator> iter = ds1->CreateIterator();
465 // Expect failure: The data pipeline is not a tree
466 EXPECT_EQ(iter, nullptr);
467 }
468
TEST_F(MindDataTestPipeline,TestConcatSuccess)469 TEST_F(MindDataTestPipeline, TestConcatSuccess) {
470 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatSuccess.";
471
472 // Create an ImageFolder Dataset
473 // Column names: {"image", "label"}
474 std::string folder_path = datasets_root_path_ + "/testPK/data/";
475 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
476 EXPECT_NE(ds, nullptr);
477
478 // Create a Cifar10 Dataset
479 // Column names: {"image", "label"}
480 folder_path = datasets_root_path_ + "/testCifar10Data/";
481 std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 9));
482 EXPECT_NE(ds2, nullptr);
483
484 // Create a Project operation on ds
485 ds = ds->Project({"image"});
486 EXPECT_NE(ds, nullptr);
487 ds2 = ds2->Project({"image"});
488 EXPECT_NE(ds, nullptr);
489
490 // Create a Concat operation on the ds
491 ds = ds->Concat({ds2});
492 EXPECT_NE(ds, nullptr);
493
494 // Create a Batch operation on ds
495 int32_t batch_size = 1;
496 ds = ds->Batch(batch_size);
497 EXPECT_NE(ds, nullptr);
498
499 // Create an iterator over the result of the above dataset
500 // This will trigger the creation of the Execution Tree and launch it.
501 std::shared_ptr<Iterator> iter = ds->CreateIterator();
502 EXPECT_NE(iter, nullptr);
503
504 // iterate over the dataset and get each row
505 std::unordered_map<std::string, mindspore::MSTensor> row;
506 ASSERT_OK(iter->GetNextRow(&row));
507 uint64_t i = 0;
508 while (row.size() != 0) {
509 i++;
510 auto image = row["image"];
511 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
512 ASSERT_OK(iter->GetNextRow(&row));
513 }
514
515 EXPECT_EQ(i, 19);
516 // Manually terminate the pipeline
517 iter->Stop();
518 }
519
TEST_F(MindDataTestPipeline,TestConcatGetDatasetSize)520 TEST_F(MindDataTestPipeline, TestConcatGetDatasetSize) {
521 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatGetDatasetSize.";
522
523 // Create an ImageFolder Dataset
524 // Column names: {"image", "label"}
525 std::string folder_path = datasets_root_path_ + "/testPK/data/";
526 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
527 EXPECT_NE(ds, nullptr);
528
529 // Create a Cifar10 Dataset
530 // Column names: {"image", "label"}
531 folder_path = datasets_root_path_ + "/testCifar10Data/";
532 std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 9));
533 EXPECT_NE(ds2, nullptr);
534
535 // Create a Project operation on ds
536 ds = ds->Project({"image"});
537 EXPECT_NE(ds, nullptr);
538 ds2 = ds2->Project({"image"});
539 EXPECT_NE(ds, nullptr);
540
541 // Create a Concat operation on the ds
542 ds = ds->Concat({ds2});
543 EXPECT_NE(ds, nullptr);
544
545 EXPECT_EQ(ds->GetDatasetSize(), 19);
546 }
547
TEST_F(MindDataTestPipeline,TestConcatSuccess2)548 TEST_F(MindDataTestPipeline, TestConcatSuccess2) {
549 // Test "+" operator to concat two datasets
550 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatSuccess2.";
551
552 // Create an ImageFolder Dataset
553 // Column names: {"image", "label"}
554 std::string folder_path = datasets_root_path_ + "/testPK/data/";
555 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
556 EXPECT_NE(ds, nullptr);
557
558 // Create a Cifar10 Dataset
559 // Column names: {"image", "label"}
560 folder_path = datasets_root_path_ + "/testCifar10Data/";
561 std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 9));
562 EXPECT_NE(ds2, nullptr);
563
564 // Create a Project operation on ds
565 ds = ds->Project({"image"});
566 EXPECT_NE(ds, nullptr);
567 ds2 = ds2->Project({"image"});
568 EXPECT_NE(ds, nullptr);
569
570 // Create a Concat operation on the ds
571 ds = ds + ds2;
572 EXPECT_NE(ds, nullptr);
573
574 // Create a Batch operation on ds
575 int32_t batch_size = 1;
576 ds = ds->Batch(batch_size);
577 EXPECT_NE(ds, nullptr);
578
579 // Create an iterator over the result of the above dataset
580 // This will trigger the creation of the Execution Tree and launch it.
581 std::shared_ptr<Iterator> iter = ds->CreateIterator();
582 EXPECT_NE(iter, nullptr);
583
584 // iterate over the dataset and get each row
585 std::unordered_map<std::string, mindspore::MSTensor> row;
586 ASSERT_OK(iter->GetNextRow(&row));
587 uint64_t i = 0;
588 while (row.size() != 0) {
589 i++;
590 auto image = row["image"];
591 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
592 ASSERT_OK(iter->GetNextRow(&row));
593 }
594
595 EXPECT_EQ(i, 19);
596 // Manually terminate the pipeline
597 iter->Stop();
598 }
599
TEST_F(MindDataTestPipeline,TestFilterSuccess1)600 TEST_F(MindDataTestPipeline, TestFilterSuccess1) {
601 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFilterSuccess1.";
602 // Test basic filter api with specific predicate to judge if label is equal to 3
603
604 // Create a TFRecord Dataset
605 std::string data_file = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
606 std::string schema_file = datasets_root_path_ + "/test_tf_file_3_images/datasetSchema.json";
607 std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label"}, 0, ShuffleMode::kFalse);
608 EXPECT_NE(ds, nullptr);
609
610 // Create objects for the tensor ops
611 std::shared_ptr<TensorTransform> decode_op = std::make_shared<vision::Decode>(true);
612 EXPECT_NE(decode_op, nullptr);
613
614 std::shared_ptr<TensorTransform> resize_op(new vision::Resize({64, 64}));
615 EXPECT_NE(resize_op, nullptr);
616
617 // Create a Map operation on ds
618 ds = ds->Map({decode_op, resize_op});
619 EXPECT_NE(ds, nullptr);
620
621 // Create a Filter operation on ds
622 ds = ds->Filter(Predicate1, {"label"});
623 EXPECT_NE(ds, nullptr);
624
625 // Create an iterator over the result of the above dataset
626 // This will trigger the creation of the Execution Tree and launch it.
627 std::shared_ptr<Iterator> iter = ds->CreateIterator();
628 EXPECT_NE(iter, nullptr);
629
630 // iterate over the dataset and get each row
631 std::unordered_map<std::string, mindspore::MSTensor> row;
632 ASSERT_OK(iter->GetNextRow(&row));
633
634 std::vector<uint64_t> label_list;
635 uint64_t i = 0;
636 while (row.size() != 0) {
637 i++;
638 auto label = row["label"];
639
640 std::shared_ptr<Tensor> de_label;
641 uint64_t label_value;
642 ASSERT_OK(Tensor::CreateFromMSTensor(label, &de_label));
643 ASSERT_OK(de_label->GetItemAt(&label_value, {0}));
644 label_list.push_back(label_value);
645
646 ASSERT_OK(iter->GetNextRow(&row));
647 }
648
649 // Only 1 column whose label is equal to 3
650 EXPECT_EQ(i, 1);
651 EXPECT_EQ(label_list.at(0), 3);
652
653 // Manually terminate the pipeline
654 iter->Stop();
655 }
656
TEST_F(MindDataTestPipeline,TestFilterSuccess2)657 TEST_F(MindDataTestPipeline, TestFilterSuccess2) {
658 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFilterSuccess2.";
659 // Test filter api without input_columns
660
661 // Create a TFRecord Dataset
662 std::string data_file = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
663 std::string schema_file = datasets_root_path_ + "/test_tf_file_3_images/datasetSchema.json";
664 std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label"}, 0, ShuffleMode::kFalse);
665 EXPECT_NE(ds, nullptr);
666
667 // Create a Filter operation on ds
668 ds = ds->Filter(Predicate2);
669 EXPECT_NE(ds, nullptr);
670
671 // Create an iterator over the result of the above dataset
672 // This will trigger the creation of the Execution Tree and launch it.
673 std::shared_ptr<Iterator> iter = ds->CreateIterator();
674 EXPECT_NE(iter, nullptr);
675
676 // iterate over the dataset and get each row
677 std::unordered_map<std::string, mindspore::MSTensor> row;
678 ASSERT_OK(iter->GetNextRow(&row));
679
680 std::vector<uint64_t> label_list;
681 uint64_t i = 0;
682 while (row.size() != 0) {
683 i++;
684 auto label = row["label"];
685
686 std::shared_ptr<Tensor> de_label;
687 uint64_t label_value;
688 ASSERT_OK(Tensor::CreateFromMSTensor(label, &de_label));
689 ASSERT_OK(de_label->GetItemAt(&label_value, {0}));
690 label_list.push_back(label_value);
691
692 ASSERT_OK(iter->GetNextRow(&row));
693 }
694
695 // There are 2 columns whose label is more than 1
696 EXPECT_EQ(i, 2);
697 EXPECT_EQ(label_list.at(0), 2);
698 EXPECT_EQ(label_list.at(1), 3);
699
700 // Manually terminate the pipeline
701 iter->Stop();
702 }
703
TEST_F(MindDataTestPipeline,TestFilterFail1)704 TEST_F(MindDataTestPipeline, TestFilterFail1) {
705 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFilterFail1.";
706 // Test filter api with nullptr predicate
707
708 // Create a TFRecord Dataset
709 std::string data_file = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
710 std::string schema_file = datasets_root_path_ + "/test_tf_file_3_images/datasetSchema.json";
711 std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label"}, 0, ShuffleMode::kFalse);
712 EXPECT_NE(ds, nullptr);
713
714 std::function<MSTensorVec(MSTensorVec)> predicate_null = nullptr;
715
716 // Create a Filter operation on ds
717 ds = ds->Filter(predicate_null);
718 EXPECT_NE(ds, nullptr);
719
720 // Create an iterator over the result of the above dataset
721 std::shared_ptr<Iterator> iter = ds->CreateIterator();
722 // Expect failure: invalid Filter input with nullptr predicate
723 EXPECT_EQ(iter, nullptr);
724 }
725
TEST_F(MindDataTestPipeline,TestFilterFail2)726 TEST_F(MindDataTestPipeline, TestFilterFail2) {
727 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFilterFail2.";
728 // Test filter api with wrong input_columns
729
730 // Create a TFRecord Dataset
731 std::string data_file = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
732 std::string schema_file = datasets_root_path_ + "/test_tf_file_3_images/datasetSchema.json";
733 std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label"}, 0, ShuffleMode::kFalse);
734 EXPECT_NE(ds, nullptr);
735
736 // Create a Filter operation on ds
737 ds = ds->Filter(Predicate1, {"not_exist"});
738 EXPECT_NE(ds, nullptr);
739
740 // Create an iterator over the result of the above dataset
741 // This will trigger the creation of the Execution Tree and launch it.
742 std::shared_ptr<Iterator> iter = ds->CreateIterator();
743 EXPECT_NE(iter, nullptr);
744
745 // iterate over the dataset and get each row
746 std::unordered_map<std::string, mindspore::MSTensor> row;
747 EXPECT_ERROR(iter->GetNextRow(&row));
748
749 uint64_t i = 0;
750 while (row.size() != 0) {
751 i++;
752 EXPECT_ERROR(iter->GetNextRow(&row));
753 }
754
755 // Expect failure: column check fail and return nothing
756 EXPECT_EQ(i, 0);
757
758 // Manually terminate the pipeline
759 iter->Stop();
760 }
761
TEST_F(MindDataTestPipeline,TestFilterFail3)762 TEST_F(MindDataTestPipeline, TestFilterFail3) {
763 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFilterFail3.";
764 // Test filter api with empty input_columns
765
766 // Create a TFRecord Dataset
767 std::string data_file = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
768 std::string schema_file = datasets_root_path_ + "/test_tf_file_3_images/datasetSchema.json";
769 std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label"}, 0, ShuffleMode::kFalse);
770 EXPECT_NE(ds, nullptr);
771
772 // Create a Filter operation on ds
773 ds = ds->Filter(Predicate1, {""});
774 EXPECT_NE(ds, nullptr);
775
776 // Create an iterator over the result of the above dataset
777 std::shared_ptr<Iterator> iter = ds->CreateIterator();
778 // Expect failure: invalid Filter input with empty string of column name
779 EXPECT_EQ(iter, nullptr);
780 }
781
TEST_F(MindDataTestPipeline,TestImageFolderBatchAndRepeat)782 TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) {
783 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderBatchAndRepeat.";
784
785 // Create an ImageFolder Dataset
786 std::string folder_path = datasets_root_path_ + "/testPK/data/";
787 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
788 EXPECT_NE(ds, nullptr);
789
790 // Create a Repeat operation on ds
791 int32_t repeat_num = 2;
792 ds = ds->Repeat(repeat_num);
793 EXPECT_NE(ds, nullptr);
794
795 // Create a Batch operation on ds
796 int32_t batch_size = 2;
797 ds = ds->Batch(batch_size);
798 EXPECT_NE(ds, nullptr);
799
800 // Create an iterator over the result of the above dataset
801 // This will trigger the creation of the Execution Tree and launch it.
802 std::shared_ptr<Iterator> iter = ds->CreateIterator();
803 EXPECT_NE(iter, nullptr);
804
805 // iterate over the dataset and get each row
806 std::unordered_map<std::string, mindspore::MSTensor> row;
807 ASSERT_OK(iter->GetNextRow(&row));
808
809 uint64_t i = 0;
810 while (row.size() != 0) {
811 i++;
812 auto image = row["image"];
813 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
814 ASSERT_OK(iter->GetNextRow(&row));
815 }
816
817 EXPECT_EQ(i, 10);
818
819 // Manually terminate the pipeline
820 iter->Stop();
821 }
822
TEST_F(MindDataTestPipeline,TestPipelineGetDatasetSize)823 TEST_F(MindDataTestPipeline, TestPipelineGetDatasetSize) {
824 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPipelineGetDatasetSize.";
825
826 // Create an ImageFolder Dataset
827 std::string folder_path = datasets_root_path_ + "/testPK/data/";
828 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
829 EXPECT_NE(ds, nullptr);
830
831 // Create a Repeat operation on ds
832 int32_t repeat_num = 2;
833 ds = ds->Repeat(repeat_num);
834 EXPECT_NE(ds, nullptr);
835
836 // Create a Batch operation on ds
837 int32_t batch_size = 2;
838 ds = ds->Batch(batch_size);
839 EXPECT_NE(ds, nullptr);
840
841 EXPECT_EQ(ds->GetDatasetSize(), 10);
842 }
843
TEST_F(MindDataTestPipeline,TestDistributedGetDatasetSize1)844 TEST_F(MindDataTestPipeline, TestDistributedGetDatasetSize1) {
845 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedGetDatasetSize1.";
846 // Test get dataset size in distributed scenario when num_per_shard is more than num_samples
847
848 // Create an ImageFolder Dataset
849 std::string folder_path = datasets_root_path_ + "/testPK/data/";
850 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<DistributedSampler>(4, 0, false, 10));
851 EXPECT_NE(ds, nullptr);
852
853 // num_per_shard is equal to 44/4 = 11 which is more than num_samples = 10, so the output is 10
854 EXPECT_EQ(ds->GetDatasetSize(), 10);
855
856 // Create an iterator over the result of the above dataset
857 // This will trigger the creation of the Execution Tree and launch it.
858 std::shared_ptr<Iterator> iter = ds->CreateIterator();
859 EXPECT_NE(iter, nullptr);
860
861 // iterate over the dataset and get each row
862 std::unordered_map<std::string, mindspore::MSTensor> row;
863 ASSERT_OK(iter->GetNextRow(&row));
864
865 uint64_t i = 0;
866 while (row.size() != 0) {
867 i++;
868 ASSERT_OK(iter->GetNextRow(&row));
869 }
870
871 // The value of i should be equal to the result of get dataset size
872 EXPECT_EQ(i, 10);
873 }
874
TEST_F(MindDataTestPipeline,TestDistributedGetDatasetSize2)875 TEST_F(MindDataTestPipeline, TestDistributedGetDatasetSize2) {
876 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedGetDatasetSize2.";
877 // Test get dataset size in distributed scenario when num_per_shard is less than num_samples
878
879 // Create an ImageFolder Dataset
880 std::string folder_path = datasets_root_path_ + "/testPK/data/";
881 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<DistributedSampler>(4, 0, false, 15));
882 EXPECT_NE(ds, nullptr);
883
884 // num_per_shard is equal to 44/4 = 11 which is less than num_samples = 15, so the output is 11
885 EXPECT_EQ(ds->GetDatasetSize(), 11);
886
887 // Create an iterator over the result of the above dataset
888 // This will trigger the creation of the Execution Tree and launch it.
889 std::shared_ptr<Iterator> iter = ds->CreateIterator();
890 EXPECT_NE(iter, nullptr);
891
892 // iterate over the dataset and get each row
893 std::unordered_map<std::string, mindspore::MSTensor> row;
894 ASSERT_OK(iter->GetNextRow(&row));
895
896 uint64_t i = 0;
897 while (row.size() != 0) {
898 i++;
899 ASSERT_OK(iter->GetNextRow(&row));
900 }
901
902 // The value of i should be equal to the result of get dataset size
903 EXPECT_EQ(i, 11);
904 }
905
TEST_F(MindDataTestPipeline,TestProjectMap)906 TEST_F(MindDataTestPipeline, TestProjectMap) {
907 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProjectMap.";
908
909 // Create an ImageFolder Dataset
910 std::string folder_path = datasets_root_path_ + "/testPK/data/";
911 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
912 EXPECT_NE(ds, nullptr);
913
914 // Create a Repeat operation on ds
915 int32_t repeat_num = 2;
916 ds = ds->Repeat(repeat_num);
917 EXPECT_NE(ds, nullptr);
918
919 // Create objects for the tensor ops
920 std::shared_ptr<TensorTransform> random_vertical_flip_op = std::make_shared<vision::RandomVerticalFlip>(0.5);
921 EXPECT_NE(random_vertical_flip_op, nullptr);
922
923 // Create a Map operation on ds
924 ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"});
925 EXPECT_NE(ds, nullptr);
926
927 // Create a Project operation on ds
928 std::vector<std::string> column_project = {"image"};
929 ds = ds->Project(column_project);
930 EXPECT_NE(ds, nullptr);
931
932 // Create a Batch operation on ds
933 int32_t batch_size = 1;
934 ds = ds->Batch(batch_size);
935 EXPECT_NE(ds, nullptr);
936
937 // Create an iterator over the result of the above dataset
938 // This will trigger the creation of the Execution Tree and launch it.
939 std::shared_ptr<Iterator> iter = ds->CreateIterator();
940 EXPECT_NE(iter, nullptr);
941
942 // iterate over the dataset and get each row
943 std::unordered_map<std::string, mindspore::MSTensor> row;
944 ASSERT_OK(iter->GetNextRow(&row));
945
946 uint64_t i = 0;
947 while (row.size() != 0) {
948 i++;
949 auto image = row["image"];
950 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
951 ASSERT_OK(iter->GetNextRow(&row));
952 }
953
954 EXPECT_EQ(i, 20);
955
956 // Manually terminate the pipeline
957 iter->Stop();
958 }
959
TEST_F(MindDataTestPipeline,TestProjectDuplicateColumnFail)960 TEST_F(MindDataTestPipeline, TestProjectDuplicateColumnFail) {
961 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProjectDuplicateColumnFail.";
962
963 // Create an ImageFolder Dataset
964 std::string folder_path = datasets_root_path_ + "/testPK/data/";
965 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 3));
966 EXPECT_NE(ds, nullptr);
967
968 // Create objects for the tensor ops
969 std::shared_ptr<TensorTransform> random_vertical_flip_op = std::make_shared<vision::RandomVerticalFlip>(0.5);
970 EXPECT_NE(random_vertical_flip_op, nullptr);
971
972 // Create a Map operation on ds
973 ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"});
974 EXPECT_NE(ds, nullptr);
975
976 // Create a Project operation on ds
977 std::vector<std::string> column_project = {"image", "image"};
978
979 // Create a Project operation on ds
980 ds = ds->Project(column_project);
981 EXPECT_NE(ds, nullptr);
982
983 // Create an iterator over the result of the above dataset
984 std::shared_ptr<Iterator> iter = ds->CreateIterator();
985 // Expect failure: duplicate project op column name
986 EXPECT_EQ(iter, nullptr);
987 }
988
TEST_F(MindDataTestPipeline,TestMapDuplicateColumnFail)989 TEST_F(MindDataTestPipeline, TestMapDuplicateColumnFail) {
990 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMapDuplicateColumnFail.";
991
992 // Create an ImageFolder Dataset
993 std::string folder_path = datasets_root_path_ + "/testPK/data/";
994 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
995 EXPECT_NE(ds, nullptr);
996
997 // Create objects for the tensor ops
998 std::shared_ptr<TensorTransform> random_vertical_flip_op = std::make_shared<vision::RandomVerticalFlip>(0.5);
999 EXPECT_NE(random_vertical_flip_op, nullptr);
1000
1001 // Create a Map operation on ds
1002 auto ds1 = ds->Map({random_vertical_flip_op}, {"image", "image"}, {}, {});
1003 EXPECT_NE(ds1, nullptr);
1004
1005 // Create an iterator over the result of the above dataset
1006 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
1007 // Expect failure: duplicate Map op input column name
1008 EXPECT_EQ(iter1, nullptr);
1009
1010 // Create a Map operation on ds
1011 auto ds2 = ds->Map({random_vertical_flip_op}, {}, {"label", "label"}, {});
1012 EXPECT_NE(ds2, nullptr);
1013
1014 // Create an iterator over the result of the above dataset
1015 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
1016 // Expect failure: duplicate Map op output column name
1017 EXPECT_EQ(iter2, nullptr);
1018
1019 // Create a Map operation on ds
1020 auto ds3 = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "image"});
1021 EXPECT_NE(ds3, nullptr);
1022
1023 // Create an iterator over the result of the above dataset
1024 std::shared_ptr<Iterator> iter3 = ds3->CreateIterator();
1025 // Expect failure: duplicate Map op project column name
1026 EXPECT_EQ(iter3, nullptr);
1027 }
1028
TEST_F(MindDataTestPipeline,TestMapNullOperation)1029 TEST_F(MindDataTestPipeline, TestMapNullOperation) {
1030 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMapNullOperation.";
1031
1032 // Create an ImageFolder Dataset
1033 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1034 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1035 EXPECT_NE(ds, nullptr);
1036
1037 // Create a Map operation on ds
1038 std::shared_ptr<TensorTransform> operation = nullptr;
1039 auto ds1 = ds->Map({operation}, {"image"}, {}, {});
1040 EXPECT_NE(ds1, nullptr);
1041
1042 // Create an iterator over the result of the above dataset
1043 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
1044 // Expect failure: Operation is nullptr
1045 EXPECT_EQ(iter1, nullptr);
1046 }
1047
TEST_F(MindDataTestPipeline,TestProjectMapAutoInjection)1048 TEST_F(MindDataTestPipeline, TestProjectMapAutoInjection) {
1049 MS_LOG(INFO) << "Doing MindDataTestPipeline.TestProjectMapAutoInjection";
1050
1051 // Create an ImageFolder Dataset
1052 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1053 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1054 EXPECT_NE(ds, nullptr);
1055
1056 // Create a Repeat operation on ds
1057 int32_t repeat_num = 2;
1058 ds = ds->Repeat(repeat_num);
1059 EXPECT_NE(ds, nullptr);
1060
1061 // Create objects for the tensor ops
1062 std::shared_ptr<TensorTransform> resize_op(new vision::Resize({30, 30}));
1063 EXPECT_NE(resize_op, nullptr);
1064
1065 // Create a Map operation on ds
1066 // {"image"} is the project columns. This will trigger auto injection of ProjectOp after MapOp.
1067 ds = ds->Map({resize_op}, {}, {}, {"image"});
1068 EXPECT_NE(ds, nullptr);
1069
1070 // Create an iterator over the result of the above dataset
1071 // This will trigger the creation of the Execution Tree and launch it.
1072 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1073 EXPECT_NE(iter, nullptr);
1074
1075 // iterate over the dataset and get each row
1076 std::unordered_map<std::string, mindspore::MSTensor> row;
1077 ASSERT_OK(iter->GetNextRow(&row));
1078
1079 // 'label' is dropped during the project op
1080 EXPECT_EQ(row.find("label"), row.end());
1081 // 'image' column should still exist
1082 EXPECT_NE(row.find("image"), row.end());
1083
1084 uint64_t i = 0;
1085 while (row.size() != 0) {
1086 i++;
1087 auto image = row["image"];
1088 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1089 EXPECT_EQ(image.Shape()[0], 30);
1090 ASSERT_OK(iter->GetNextRow(&row));
1091 }
1092
1093 EXPECT_EQ(i, 20);
1094
1095 // Manually terminate the pipeline
1096 iter->Stop();
1097 }
1098
TEST_F(MindDataTestPipeline,TestRenameFail1)1099 TEST_F(MindDataTestPipeline, TestRenameFail1) {
1100 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail1.";
1101 // We expect this test to fail because input and output in Rename are not the same size
1102
1103 // Create an ImageFolder Dataset
1104 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1105 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1106 EXPECT_NE(ds, nullptr);
1107
1108 // Create a Repeat operation on ds
1109 int32_t repeat_num = 2;
1110 ds = ds->Repeat(repeat_num);
1111 EXPECT_NE(ds, nullptr);
1112
1113 // Create a Rename operation on ds
1114 ds = ds->Rename({"image", "label"}, {"col2"});
1115 EXPECT_NE(ds, nullptr);
1116
1117 // Create an iterator over the result of the above dataset
1118 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1119 // Expect failure: invalid Op input
1120 EXPECT_EQ(iter, nullptr);
1121 }
1122
TEST_F(MindDataTestPipeline,TestRenameFail2)1123 TEST_F(MindDataTestPipeline, TestRenameFail2) {
1124 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail2.";
1125 // We expect this test to fail because input or output column name is empty
1126
1127 // Create an ImageFolder Dataset
1128 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1129 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1130 EXPECT_NE(ds, nullptr);
1131
1132 // Create a Rename operation on ds
1133 ds = ds->Rename({"image", "label"}, {"col2", ""});
1134 EXPECT_NE(ds, nullptr);
1135
1136 // Create an iterator over the result of the above dataset
1137 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1138 // Expect failure: invalid Op input
1139 EXPECT_EQ(iter, nullptr);
1140 }
1141
TEST_F(MindDataTestPipeline,TestRenameFail3)1142 TEST_F(MindDataTestPipeline, TestRenameFail3) {
1143 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail3.";
1144 // We expect this test to fail because duplicate column name
1145
1146 // Create an ImageFolder Dataset
1147 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1148 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1149 EXPECT_NE(ds, nullptr);
1150
1151 // Create a Rename operation on ds
1152 auto ds1 = ds->Rename({"image", "image"}, {"col1", "col2"});
1153 EXPECT_NE(ds1, nullptr);
1154
1155 // Create an iterator over the result of the above dataset
1156 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
1157 // Expect failure: invalid Op input
1158 EXPECT_EQ(iter1, nullptr);
1159
1160 // Create a Rename operation on ds
1161 auto ds2 = ds->Rename({"image", "label"}, {"col1", "col1"});
1162 EXPECT_NE(ds2, nullptr);
1163
1164 // Create an iterator over the result of the above dataset
1165 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
1166 // Expect failure: invalid Op input
1167 EXPECT_EQ(iter2, nullptr);
1168 }
1169
TEST_F(MindDataTestPipeline,TestRenameSuccess)1170 TEST_F(MindDataTestPipeline, TestRenameSuccess) {
1171 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameSuccess.";
1172
1173 // Create an ImageFolder Dataset
1174 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1175 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1176 EXPECT_NE(ds, nullptr);
1177
1178 // Create a Repeat operation on ds
1179 int32_t repeat_num = 2;
1180 ds = ds->Repeat(repeat_num);
1181 EXPECT_NE(ds, nullptr);
1182
1183 // Create a Rename operation on ds
1184 ds = ds->Rename({"image", "label"}, {"col1", "col2"});
1185 EXPECT_NE(ds, nullptr);
1186
1187 // Create a Batch operation on ds
1188 int32_t batch_size = 1;
1189 ds = ds->Batch(batch_size);
1190 EXPECT_NE(ds, nullptr);
1191
1192 // Create an iterator over the result of the above dataset
1193 // This will trigger the creation of the Execution Tree and launch it.
1194 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1195 EXPECT_NE(iter, nullptr);
1196
1197 // iterate over the dataset and get each row
1198 std::unordered_map<std::string, mindspore::MSTensor> row;
1199 ASSERT_OK(iter->GetNextRow(&row));
1200
1201 uint64_t i = 0;
1202 EXPECT_NE(row.find("col1"), row.end());
1203 EXPECT_NE(row.find("col2"), row.end());
1204 EXPECT_EQ(row.find("image"), row.end());
1205 EXPECT_EQ(row.find("label"), row.end());
1206
1207 while (row.size() != 0) {
1208 i++;
1209 auto image = row["col1"];
1210 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1211 ASSERT_OK(iter->GetNextRow(&row));
1212 }
1213
1214 EXPECT_EQ(i, 20);
1215
1216 // Manually terminate the pipeline
1217 iter->Stop();
1218 }
1219
TEST_F(MindDataTestPipeline,TestRepeatDefault)1220 TEST_F(MindDataTestPipeline, TestRepeatDefault) {
1221 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRepeatDefault.";
1222
1223 // Create an ImageFolder Dataset
1224 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1225 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1226 EXPECT_NE(ds, nullptr);
1227
1228 // Create a Repeat operation on ds
1229 // Default value of repeat count is -1, expected to repeat infinitely
1230 ds = ds->Repeat();
1231 EXPECT_NE(ds, nullptr);
1232
1233 // Create a Batch operation on ds
1234 int32_t batch_size = 1;
1235 ds = ds->Batch(batch_size);
1236 EXPECT_NE(ds, nullptr);
1237
1238 // Create an iterator over the result of the above dataset
1239 // This will trigger the creation of the Execution Tree and launch it.
1240 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1241 EXPECT_NE(iter, nullptr);
1242
1243 // iterate over the dataset and get each row
1244 std::unordered_map<std::string, mindspore::MSTensor> row;
1245 ASSERT_OK(iter->GetNextRow(&row));
1246 uint64_t i = 0;
1247 while (row.size() != 0) {
1248 // manually stop
1249 if (i == 100) {
1250 break;
1251 }
1252 i++;
1253 auto image = row["image"];
1254 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1255 ASSERT_OK(iter->GetNextRow(&row));
1256 }
1257
1258 EXPECT_EQ(i, 100);
1259 // Manually terminate the pipeline
1260 iter->Stop();
1261 }
1262
TEST_F(MindDataTestPipeline,TestRepeatOne)1263 TEST_F(MindDataTestPipeline, TestRepeatOne) {
1264 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRepeatOne.";
1265
1266 // Create an ImageFolder Dataset
1267 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1268 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1269 EXPECT_NE(ds, nullptr);
1270
1271 // Create a Repeat operation on ds
1272 int32_t repeat_num = 1;
1273 ds = ds->Repeat(repeat_num);
1274 EXPECT_NE(ds, nullptr);
1275
1276 // Create a Batch operation on ds
1277 int32_t batch_size = 1;
1278 ds = ds->Batch(batch_size);
1279 EXPECT_NE(ds, nullptr);
1280
1281 // Create an iterator over the result of the above dataset
1282 // This will trigger the creation of the Execution Tree and launch it.
1283 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1284 EXPECT_NE(iter, nullptr);
1285
1286 // iterate over the dataset and get each row
1287 std::unordered_map<std::string, mindspore::MSTensor> row;
1288 ASSERT_OK(iter->GetNextRow(&row));
1289 uint64_t i = 0;
1290 while (row.size() != 0) {
1291 i++;
1292 auto image = row["image"];
1293 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1294 ASSERT_OK(iter->GetNextRow(&row));
1295 }
1296
1297 EXPECT_EQ(i, 10);
1298 // Manually terminate the pipeline
1299 iter->Stop();
1300 }
1301
TEST_F(MindDataTestPipeline,TestRepeatFail1)1302 TEST_F(MindDataTestPipeline, TestRepeatFail1) {
1303 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRepeatFail1.";
1304
1305 // Create an ImageFolder Dataset
1306 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1307 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1308 EXPECT_NE(ds, nullptr);
1309
1310 // Create a Repeat operation on ds
1311 int32_t repeat_num = 0;
1312 ds = ds->Repeat(repeat_num);
1313 EXPECT_NE(ds, nullptr);
1314
1315 // Create an iterator over the result of the above dataset
1316 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1317 // Expect failure: invalid Op input
1318 EXPECT_EQ(iter, nullptr);
1319 }
1320
TEST_F(MindDataTestPipeline,TestRepeatFail2)1321 TEST_F(MindDataTestPipeline, TestRepeatFail2) {
1322 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRepeatFail2.";
1323 // This case is expected to fail because the repeat count is invalid (<-1 && !=0).
1324
1325 // Create an ImageFolder Dataset
1326 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1327 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1328 EXPECT_NE(ds, nullptr);
1329
1330 // Create a Repeat operation on ds
1331 int32_t repeat_num = -2;
1332 ds = ds->Repeat(repeat_num);
1333 EXPECT_NE(ds, nullptr);
1334
1335 // Create an iterator over the result of the above dataset
1336 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1337 // Expect failure: invalid Op input
1338 EXPECT_EQ(iter, nullptr);
1339 }
1340
TEST_F(MindDataTestPipeline,TestShuffleDataset)1341 TEST_F(MindDataTestPipeline, TestShuffleDataset) {
1342 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestShuffleDataset.";
1343
1344 // Create an ImageFolder Dataset
1345 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1346 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1347 EXPECT_NE(ds, nullptr);
1348
1349 // Create a Shuffle operation on ds
1350 int32_t shuffle_size = 10;
1351 ds = ds->Shuffle(shuffle_size);
1352 EXPECT_NE(ds, nullptr);
1353
1354 // Create a Repeat operation on ds
1355 int32_t repeat_num = 2;
1356 ds = ds->Repeat(repeat_num);
1357 EXPECT_NE(ds, nullptr);
1358
1359 // Create a Batch operation on ds
1360 int32_t batch_size = 2;
1361 ds = ds->Batch(batch_size);
1362 EXPECT_NE(ds, nullptr);
1363
1364 // Create an iterator over the result of the above dataset
1365 // This will trigger the creation of the Execution Tree and launch it.
1366 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1367 EXPECT_NE(iter, nullptr);
1368
1369 // iterate over the dataset and get each row
1370 std::unordered_map<std::string, mindspore::MSTensor> row;
1371 ASSERT_OK(iter->GetNextRow(&row));
1372
1373 uint64_t i = 0;
1374 while (row.size() != 0) {
1375 i++;
1376 auto image = row["image"];
1377 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1378 ASSERT_OK(iter->GetNextRow(&row));
1379 }
1380
1381 EXPECT_EQ(i, 10);
1382
1383 // Manually terminate the pipeline
1384 iter->Stop();
1385 }
1386
TEST_F(MindDataTestPipeline,TestSkipDataset)1387 TEST_F(MindDataTestPipeline, TestSkipDataset) {
1388 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSkipDataset.";
1389
1390 // Create an ImageFolder Dataset
1391 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1392 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1393 EXPECT_NE(ds, nullptr);
1394
1395 // Create a Skip operation on ds
1396 int32_t count = 3;
1397 ds = ds->Skip(count);
1398 EXPECT_NE(ds, nullptr);
1399
1400 // Create an iterator over the result of the above dataset
1401 // This will trigger the creation of the Execution Tree and launch it.
1402 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1403 EXPECT_NE(iter, nullptr);
1404
1405 // iterate over the dataset and get each row
1406 std::unordered_map<std::string, mindspore::MSTensor> row;
1407 ASSERT_OK(iter->GetNextRow(&row));
1408
1409 uint64_t i = 0;
1410 while (row.size() != 0) {
1411 i++;
1412 auto image = row["image"];
1413 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1414 ASSERT_OK(iter->GetNextRow(&row));
1415 }
1416 MS_LOG(INFO) << "Number of rows: " << i;
1417
1418 // Expect 10-3=7 rows
1419 EXPECT_EQ(i, 7);
1420
1421 // Manually terminate the pipeline
1422 iter->Stop();
1423 }
1424
TEST_F(MindDataTestPipeline,TestSkipTakeRepeat)1425 TEST_F(MindDataTestPipeline, TestSkipTakeRepeat) {
1426 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSkipTakeRepeat.";
1427
1428 // Create an ImageFolder Dataset
1429 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1430 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 6));
1431
1432 // Create a Skip operation on ds
1433 int32_t count = 0;
1434 ds = ds->Skip(count);
1435
1436 // Create a Project operation on ds
1437 std::vector<std::string> column_project = {"image"};
1438 ds = ds->Project(column_project);
1439
1440 // Add a Take(-1)
1441 ds = ds->Take(-1);
1442
1443 // Add a Repeat(1)
1444 ds = ds->Repeat(1);
1445
1446 // Create an iterator over the result of the above dataset
1447 // This will trigger the creation of the Execution Tree and launch it.
1448 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1449
1450 // iterate over the dataset and get each row
1451 std::unordered_map<std::string, mindspore::MSTensor> row;
1452 ASSERT_OK(iter->GetNextRow(&row));
1453
1454 uint64_t i = 0;
1455 while (row.size() != 0) {
1456 i++;
1457 auto image = row["image"];
1458 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1459 ASSERT_OK(iter->GetNextRow(&row));
1460 }
1461 MS_LOG(INFO) << "Number of rows: " << i;
1462
1463 // Expect 6 rows
1464 EXPECT_EQ(i, 6);
1465
1466 // Manually terminate the pipeline
1467 iter->Stop();
1468 }
1469
TEST_F(MindDataTestPipeline,TestSkipGetDatasetSize)1470 TEST_F(MindDataTestPipeline, TestSkipGetDatasetSize) {
1471 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSkipGetDatasetSize.";
1472
1473 // Create an ImageFolder Dataset
1474 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1475 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1476 EXPECT_NE(ds, nullptr);
1477
1478 // Create a Skip operation on ds
1479 int32_t count = 3;
1480 ds = ds->Skip(count);
1481 EXPECT_NE(ds, nullptr);
1482
1483 EXPECT_EQ(ds->GetDatasetSize(), 7);
1484 }
1485
TEST_F(MindDataTestPipeline,TestSkipDatasetError1)1486 TEST_F(MindDataTestPipeline, TestSkipDatasetError1) {
1487 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSkipDatasetError1.";
1488
1489 // Create an ImageFolder Dataset
1490 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1491 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1492 EXPECT_NE(ds, nullptr);
1493
1494 // Create a Skip operation on ds with invalid count input
1495 int32_t count = -1;
1496 ds = ds->Skip(count);
1497 EXPECT_NE(ds, nullptr);
1498
1499 // Create an iterator over the result of the above dataset
1500 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1501 // Expect failure: invalid Skip input
1502 EXPECT_EQ(iter, nullptr);
1503 }
1504
TEST_F(MindDataTestPipeline,TestTakeDatasetDefault)1505 TEST_F(MindDataTestPipeline, TestTakeDatasetDefault) {
1506 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTakeDatasetDefault.";
1507
1508 // Create an ImageFolder Dataset
1509 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1510 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 7));
1511 EXPECT_NE(ds, nullptr);
1512
1513 // Create a Take operation on ds, default count = -1
1514 ds = ds->Take();
1515 EXPECT_NE(ds, nullptr);
1516
1517 // Create an iterator over the result of the above dataset
1518 // This will trigger the creation of the Execution Tree and launch it.
1519 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1520 EXPECT_NE(iter, nullptr);
1521
1522 // iterate over the dataset and get each row
1523 std::unordered_map<std::string, mindspore::MSTensor> row;
1524 ASSERT_OK(iter->GetNextRow(&row));
1525
1526 uint64_t i = 0;
1527 while (row.size() != 0) {
1528 i++;
1529 auto image = row["image"];
1530 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1531 ASSERT_OK(iter->GetNextRow(&row));
1532 }
1533 MS_LOG(INFO) << "Number of rows: " << i;
1534
1535 // Expect 7 rows
1536 EXPECT_EQ(i, 7);
1537
1538 // Manually terminate the pipeline
1539 iter->Stop();
1540 }
1541
TEST_F(MindDataTestPipeline,TestTakeGetDatasetSize)1542 TEST_F(MindDataTestPipeline, TestTakeGetDatasetSize) {
1543 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTakeGetDatasetSize.";
1544
1545 // Create an ImageFolder Dataset
1546 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1547 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 7));
1548 EXPECT_NE(ds, nullptr);
1549
1550 // Create a Take operation on ds, default count = -1
1551 ds = ds->Take(2);
1552 EXPECT_NE(ds, nullptr);
1553
1554 EXPECT_EQ(ds->GetDatasetSize(), 2);
1555 }
1556
TEST_F(MindDataTestPipeline,TestTakeDatasetError1)1557 TEST_F(MindDataTestPipeline, TestTakeDatasetError1) {
1558 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTakeDatasetError1.";
1559
1560 // Create an ImageFolder Dataset
1561 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1562 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1563 EXPECT_NE(ds, nullptr);
1564
1565 // Create a Take operation on ds with invalid count input
1566 int32_t count = -5;
1567 auto ds1 = ds->Take(count);
1568 EXPECT_NE(ds1, nullptr);
1569
1570 // Create an iterator over the result of the above dataset
1571 std::shared_ptr<Iterator> iter = ds1->CreateIterator();
1572 // Expect failure: invalid Op input
1573 EXPECT_EQ(iter, nullptr);
1574
1575 // Create a Take operation on ds with invalid count input
1576 count = 0;
1577 auto ds2 = ds->Take(count);
1578 EXPECT_NE(ds2, nullptr);
1579
1580 // Create an iterator over the result of the above dataset
1581 iter = ds2->CreateIterator();
1582 // Expect failure: invalid Op input
1583 EXPECT_EQ(iter, nullptr);
1584 }
1585
TEST_F(MindDataTestPipeline,TestTakeDatasetNormal)1586 TEST_F(MindDataTestPipeline, TestTakeDatasetNormal) {
1587 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTakeDatasetNormal.";
1588
1589 // Create an ImageFolder Dataset
1590 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1591 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 8));
1592 EXPECT_NE(ds, nullptr);
1593
1594 // Create a Take operation on ds
1595 ds = ds->Take(5);
1596 EXPECT_NE(ds, nullptr);
1597
1598 // Create an iterator over the result of the above dataset
1599 // This will trigger the creation of the Execution Tree and launch it.
1600 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1601 EXPECT_NE(iter, nullptr);
1602
1603 // iterate over the dataset and get each row
1604 std::unordered_map<std::string, mindspore::MSTensor> row;
1605 ASSERT_OK(iter->GetNextRow(&row));
1606
1607 uint64_t i = 0;
1608 while (row.size() != 0) {
1609 i++;
1610 auto image = row["image"];
1611 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1612 ASSERT_OK(iter->GetNextRow(&row));
1613 }
1614 MS_LOG(INFO) << "Number of rows: " << i;
1615
1616 // Expect 5 rows
1617 EXPECT_EQ(i, 5);
1618
1619 // Manually terminate the pipeline
1620 iter->Stop();
1621 }
1622
TEST_F(MindDataTestPipeline,TestTensorOpsAndMap)1623 TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) {
1624 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTensorOpsAndMap.";
1625
1626 // Create a Mnist Dataset
1627 std::string folder_path = datasets_root_path_ + "/testMnistData/";
1628 std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 20));
1629 EXPECT_NE(ds, nullptr);
1630
1631 // Create a Repeat operation on ds
1632 int32_t repeat_num = 2;
1633 ds = ds->Repeat(repeat_num);
1634 EXPECT_NE(ds, nullptr);
1635
1636 // Create objects for the tensor ops
1637 std::shared_ptr<TensorTransform> resize_op(new vision::Resize({30, 30}));
1638 EXPECT_NE(resize_op, nullptr);
1639
1640 std::shared_ptr<TensorTransform> center_crop_op(new vision::CenterCrop({16, 16}));
1641 EXPECT_NE(center_crop_op, nullptr);
1642
1643 // Create a Map operation on ds
1644 ds = ds->Map({resize_op, center_crop_op});
1645 EXPECT_NE(ds, nullptr);
1646
1647 // Create a Batch operation on ds
1648 int32_t batch_size = 1;
1649 ds = ds->Batch(batch_size);
1650 EXPECT_NE(ds, nullptr);
1651
1652 // Create an iterator over the result of the above dataset
1653 // This will trigger the creation of the Execution Tree and launch it.
1654 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1655 EXPECT_NE(iter, nullptr);
1656
1657 // iterate over the dataset and get each row
1658 std::unordered_map<std::string, mindspore::MSTensor> row;
1659 ASSERT_OK(iter->GetNextRow(&row));
1660
1661 uint64_t i = 0;
1662 while (row.size() != 0) {
1663 i++;
1664 auto image = row["image"];
1665 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1666 ASSERT_OK(iter->GetNextRow(&row));
1667 }
1668
1669 EXPECT_EQ(i, 40);
1670
1671 // Manually terminate the pipeline
1672 iter->Stop();
1673 }
1674
TEST_F(MindDataTestPipeline,TestZipFail)1675 TEST_F(MindDataTestPipeline, TestZipFail) {
1676 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipFail.";
1677 // We expect this test to fail because we are the both datasets we are zipping have "image" and "label" columns
1678 // and zip doesn't accept datasets with same column names
1679
1680 // Create an ImageFolder Dataset
1681 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1682 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1683 EXPECT_NE(ds, nullptr);
1684
1685 // Create an ImageFolder Dataset
1686 std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1687 EXPECT_NE(ds1, nullptr);
1688
1689 // Create a Zip operation on the datasets
1690 ds = Zip({ds, ds1});
1691 EXPECT_NE(ds, nullptr);
1692
1693 // Create a Batch operation on ds
1694 int32_t batch_size = 1;
1695 ds = ds->Batch(batch_size);
1696 EXPECT_NE(ds, nullptr);
1697
1698 // Create an iterator over the result of the above dataset
1699 // This will trigger the creation of the Execution Tree and launch it.
1700 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1701 EXPECT_EQ(iter, nullptr);
1702 }
1703
TEST_F(MindDataTestPipeline,TestZipFail2)1704 TEST_F(MindDataTestPipeline, TestZipFail2) {
1705 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipFail2.";
1706 // This case is expected to fail because the input dataset is empty.
1707
1708 // Create an ImageFolder Dataset
1709 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1710 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1711 EXPECT_NE(ds, nullptr);
1712
1713 // Create a Zip operation on the datasets
1714 // Input dataset to zip is empty
1715 ds = Zip({});
1716 EXPECT_NE(ds, nullptr);
1717
1718 // Create an iterator over the result of the above dataset
1719 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1720 // Expect failure: invalid Op input
1721 EXPECT_EQ(iter, nullptr);
1722 }
1723
TEST_F(MindDataTestPipeline,TestZipSuccess)1724 TEST_F(MindDataTestPipeline, TestZipSuccess) {
1725 // Testing the member zip() function
1726 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipSuccess.";
1727
1728 // Create an ImageFolder Dataset
1729 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1730 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1731 EXPECT_NE(ds, nullptr);
1732
1733 // Create a Project operation on ds
1734 std::vector<std::string> column_project = {"image"};
1735 ds = ds->Project(column_project);
1736 EXPECT_NE(ds, nullptr);
1737
1738 // Create an ImageFolder Dataset
1739 std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1740 EXPECT_NE(ds1, nullptr);
1741
1742 // Create a Rename operation on ds (so that the 3 datasets we are going to zip have distinct column names)
1743 ds1 = ds1->Rename({"image", "label"}, {"col1", "col2"});
1744 EXPECT_NE(ds1, nullptr);
1745
1746 folder_path = datasets_root_path_ + "/testCifar10Data/";
1747 std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
1748 EXPECT_NE(ds2, nullptr);
1749
1750 // Create a Project operation on ds
1751 column_project = {"label"};
1752 ds2 = ds2->Project(column_project);
1753 EXPECT_NE(ds2, nullptr);
1754
1755 // Create a Zip operation on the datasets
1756 ds = ds->Zip({ds1, ds2});
1757 EXPECT_NE(ds, nullptr);
1758
1759 // Create a Batch operation on ds
1760 int32_t batch_size = 1;
1761 ds = ds->Batch(batch_size);
1762 EXPECT_NE(ds, nullptr);
1763
1764 // Create an iterator over the result of the above dataset
1765 // This will trigger the creation of the Execution Tree and launch it.
1766 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1767 EXPECT_NE(iter, nullptr);
1768
1769 // iterate over the dataset and get each row
1770 std::unordered_map<std::string, mindspore::MSTensor> row;
1771 ASSERT_OK(iter->GetNextRow(&row));
1772
1773 // Check zipped column names
1774 EXPECT_EQ(row.size(), 4);
1775 EXPECT_NE(row.find("image"), row.end());
1776 EXPECT_NE(row.find("label"), row.end());
1777 EXPECT_NE(row.find("col1"), row.end());
1778 EXPECT_NE(row.find("col2"), row.end());
1779
1780 uint64_t i = 0;
1781 while (row.size() != 0) {
1782 i++;
1783 auto image = row["image"];
1784 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1785 ASSERT_OK(iter->GetNextRow(&row));
1786 }
1787
1788 EXPECT_EQ(i, 10);
1789
1790 // Manually terminate the pipeline
1791 iter->Stop();
1792 }
1793
TEST_F(MindDataTestPipeline,TestZipGetDatasetSize)1794 TEST_F(MindDataTestPipeline, TestZipGetDatasetSize) {
1795 // Testing the member zip() function
1796 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipGetDatasetSize.";
1797
1798 // Create an ImageFolder Dataset
1799 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1800 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
1801 EXPECT_NE(ds, nullptr);
1802
1803 // Create a Project operation on ds
1804 std::vector<std::string> column_project = {"image"};
1805 ds = ds->Project(column_project);
1806 EXPECT_NE(ds, nullptr);
1807
1808 // Create an ImageFolder Dataset
1809 std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 3));
1810 EXPECT_NE(ds1, nullptr);
1811
1812 // Create a Rename operation on ds (so that the 3 datasets we are going to zip have distinct column names)
1813 ds1 = ds1->Rename({"image", "label"}, {"col1", "col2"});
1814 EXPECT_NE(ds1, nullptr);
1815
1816 folder_path = datasets_root_path_ + "/testCifar10Data/";
1817 std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 5));
1818 EXPECT_NE(ds2, nullptr);
1819
1820 // Create a Project operation on ds
1821 column_project = {"label"};
1822 ds2 = ds2->Project(column_project);
1823 EXPECT_NE(ds2, nullptr);
1824
1825 // Create a Zip operation on the datasets
1826 ds = ds->Zip({ds1, ds2});
1827 EXPECT_NE(ds, nullptr);
1828
1829 EXPECT_EQ(ds->GetDatasetSize(), 2);
1830 }
1831
TEST_F(MindDataTestPipeline,TestZipSuccess2)1832 TEST_F(MindDataTestPipeline, TestZipSuccess2) {
1833 // Testing the static zip() function
1834 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipSuccess2.";
1835
1836 // Create an ImageFolder Dataset
1837 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1838 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 9));
1839 EXPECT_NE(ds, nullptr);
1840 std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1841 EXPECT_NE(ds2, nullptr);
1842
1843 // Create a Rename operation on ds (so that the 2 datasets we are going to zip have distinct column names)
1844 ds = ds->Rename({"image", "label"}, {"col1", "col2"});
1845 EXPECT_NE(ds, nullptr);
1846
1847 // Create a Zip operation on the datasets
1848 ds = Zip({ds, ds2});
1849 EXPECT_NE(ds, nullptr);
1850
1851 // Create a Batch operation on ds
1852 int32_t batch_size = 1;
1853 ds = ds->Batch(batch_size);
1854 EXPECT_NE(ds, nullptr);
1855
1856 // Create an iterator over the result of the above dataset
1857 // This will trigger the creation of the Execution Tree and launch it.
1858 std::shared_ptr<Iterator> iter = ds->CreateIterator();
1859 EXPECT_NE(iter, nullptr);
1860
1861 // iterate over the dataset and get each row
1862 std::unordered_map<std::string, mindspore::MSTensor> row;
1863 ASSERT_OK(iter->GetNextRow(&row));
1864
1865 // Check zipped column names
1866 EXPECT_EQ(row.size(), 4);
1867 EXPECT_NE(row.find("image"), row.end());
1868 EXPECT_NE(row.find("label"), row.end());
1869 EXPECT_NE(row.find("col1"), row.end());
1870 EXPECT_NE(row.find("col2"), row.end());
1871
1872 uint64_t i = 0;
1873 while (row.size() != 0) {
1874 i++;
1875 auto image = row["image"];
1876 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1877 ASSERT_OK(iter->GetNextRow(&row));
1878 }
1879
1880 EXPECT_EQ(i, 9);
1881
1882 // Manually terminate the pipeline
1883 iter->Stop();
1884 }
1885
TEST_F(MindDataTestPipeline,TestNumWorkersValidate)1886 TEST_F(MindDataTestPipeline, TestNumWorkersValidate) {
1887 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNumWorkersValidate.";
1888
1889 // Create an ImageFolder Dataset
1890 std::string folder_path = datasets_root_path_ + "/testPK/data/";
1891 std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<SequentialSampler>(0, 1));
1892
1893 // ds needs to be non nullptr otherwise, the subsequent logic will core dump
1894 ASSERT_NE(ds, nullptr);
1895
1896 // test if set num_workers=-1
1897 EXPECT_EQ(ds->SetNumWorkers(-1)->CreateIterator(), nullptr);
1898
1899 // test if set num_workers can be very large
1900 EXPECT_EQ(ds->SetNumWorkers(INT32_MAX)->CreateIterator(), nullptr);
1901
1902 int32_t cpu_core_cnt = GlobalContext::config_manager()->num_cpu_threads();
1903
1904 // only do this test if cpu_core_cnt can be successfully obtained
1905 if (cpu_core_cnt > 0) {
1906 EXPECT_EQ(ds->SetNumWorkers(cpu_core_cnt + 1)->CreateIterator(), nullptr);
1907 // verify setting num_worker to 1 or cpu_core_cnt is allowed
1908 ASSERT_OK(ds->SetNumWorkers(cpu_core_cnt)->IRNode()->ValidateParams());
1909 ASSERT_OK(ds->SetNumWorkers(1)->IRNode()->ValidateParams());
1910 }
1911 }
1912