• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "common/common.h"
17 #include "include/api/types.h"
18 #include "minddata/dataset/core/tensor_row.h"
19 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
20 #include "minddata/dataset/include/dataset/datasets.h"
21 #include "minddata/dataset/include/dataset/vision.h"
22 
23 using namespace mindspore::dataset;
24 using mindspore::dataset::Tensor;
25 
26 class MindDataTestPipeline : public UT::DatasetOpTesting {
27  protected:
28 };
29 
VecToRow(const MSTensorVec & v)30 TensorRow VecToRow(const MSTensorVec &v) {
31   TensorRow row;
32   for (const mindspore::MSTensor &t : v) {
33     std::shared_ptr<Tensor> rt;
34     (void)Tensor::CreateFromMemory(TensorShape(t.Shape()), MSTypeToDEType(static_cast<mindspore::TypeId>(t.DataType())),
35                                    (const uchar *)(t.Data().get()), t.DataSize(), &rt);
36     row.emplace_back(rt);
37   }
38   return row;
39 }
RowToVec(const TensorRow & v)40 MSTensorVec RowToVec(const TensorRow &v) {
41   MSTensorVec rv;  // std::make_shared<DETensor>(de_tensor)
42   std::transform(v.begin(), v.end(), std::back_inserter(rv), [](std::shared_ptr<Tensor> t) -> mindspore::MSTensor {
43     return mindspore::MSTensor(std::make_shared<DETensor>(t));
44   });
45   return rv;
46 }
47 
BucketBatchTestFunction(MSTensorVec input)48 MSTensorVec BucketBatchTestFunction(MSTensorVec input) {
49   mindspore::dataset::TensorRow output;
50   std::shared_ptr<Tensor> out;
51   (void)Tensor::CreateEmpty(mindspore::dataset::TensorShape({1}),
52                             mindspore::dataset::DataType(mindspore::dataset::DataType::Type::DE_INT32), &out);
53   (void)out->SetItemAt({0}, 2);
54   output.push_back(out);
55   return RowToVec(output);
56 }
57 
Predicate1(MSTensorVec in)58 MSTensorVec Predicate1(MSTensorVec in) {
59   // Return true if input is equal to 3
60   uint64_t input_value;
61   TensorRow input = VecToRow(in);
62   (void)input.at(0)->GetItemAt(&input_value, {0});
63   bool result = (input_value == 3);
64 
65   // Convert from boolean to TensorRow
66   TensorRow output;
67   std::shared_ptr<Tensor> out;
68   (void)Tensor::CreateEmpty(mindspore::dataset::TensorShape({}),
69                             mindspore::dataset::DataType(mindspore::dataset::DataType::Type::DE_BOOL), &out);
70   (void)out->SetItemAt({}, result);
71   output.push_back(out);
72 
73   return RowToVec(output);
74 }
75 
Predicate2(MSTensorVec in)76 MSTensorVec Predicate2(MSTensorVec in) {
77   // Return true if label is more than 1
78   // The index of label in input is 1
79   uint64_t input_value;
80   TensorRow input = VecToRow(in);
81   (void)input.at(1)->GetItemAt(&input_value, {0});
82   bool result = (input_value > 1);
83 
84   // Convert from boolean to TensorRow
85   TensorRow output;
86   std::shared_ptr<Tensor> out;
87   (void)Tensor::CreateEmpty(mindspore::dataset::TensorShape({}),
88                             mindspore::dataset::DataType(mindspore::dataset::DataType::Type::DE_BOOL), &out);
89   (void)out->SetItemAt({}, result);
90   output.push_back(out);
91 
92   return RowToVec(output);
93 }
94 
TEST_F(MindDataTestPipeline,TestBatchAndRepeat)95 TEST_F(MindDataTestPipeline, TestBatchAndRepeat) {
96   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBatchAndRepeat.";
97 
98   // Create a Mnist Dataset
99   std::string folder_path = datasets_root_path_ + "/testMnistData/";
100   std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
101   EXPECT_NE(ds, nullptr);
102 
103   // Create a Repeat operation on ds
104   int32_t repeat_num = 2;
105   ds = ds->Repeat(repeat_num);
106   EXPECT_NE(ds, nullptr);
107 
108   // Create a Batch operation on ds
109   int32_t batch_size = 2;
110   ds = ds->Batch(batch_size);
111   EXPECT_NE(ds, nullptr);
112 
113   // Create an iterator over the result of the above dataset
114   // This will trigger the creation of the Execution Tree and launch it.
115   std::shared_ptr<Iterator> iter = ds->CreateIterator();
116   EXPECT_NE(iter, nullptr);
117 
118   // iterate over the dataset and get each row
119   std::unordered_map<std::string, mindspore::MSTensor> row;
120   ASSERT_OK(iter->GetNextRow(&row));
121 
122   uint64_t i = 0;
123   while (row.size() != 0) {
124     i++;
125     auto image = row["image"];
126     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
127     ASSERT_OK(iter->GetNextRow(&row));
128   }
129 
130   EXPECT_EQ(i, 10);
131 
132   // Manually terminate the pipeline
133   iter->Stop();
134 }
135 
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthSuccess1)136 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthSuccess1) {
137   // Calling with default values
138   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthSuccess1.";
139 
140   // Create a Mnist Dataset
141   std::string folder_path = datasets_root_path_ + "/testMnistData/";
142   std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
143   EXPECT_NE(ds, nullptr);
144 
145   // Create a BucketBatchByLength operation on ds
146   ds = ds->BucketBatchByLength({"image"}, {1, 2, 3}, {4, 5, 6, 7});
147   EXPECT_NE(ds, nullptr);
148 
149   // Create an iterator over the result of the above dataset
150   // This will trigger the creation of the Execution Tree and launch it.
151   std::shared_ptr<Iterator> iter = ds->CreateIterator();
152   EXPECT_NE(iter, nullptr);
153 
154   // iterate over the dataset and get each row
155   std::unordered_map<std::string, mindspore::MSTensor> row;
156   ASSERT_OK(iter->GetNextRow(&row));
157 
158   uint64_t i = 0;
159   while (row.size() != 0) {
160     i++;
161     auto image = row["image"];
162     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
163     ASSERT_OK(iter->GetNextRow(&row));
164   }
165   // 2 batches of size 5
166   EXPECT_EQ(i, 2);
167 
168   // Manually terminate the pipeline
169   iter->Stop();
170 }
171 
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthSuccess2)172 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthSuccess2) {
173   // Calling with non-default values
174   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthSuccess2.";
175 
176   // Create a Mnist Dataset
177   std::string folder_path = datasets_root_path_ + "/testMnistData/";
178   std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
179   EXPECT_NE(ds, nullptr);
180 
181   // Create a BucketBatchByLength operation on ds
182   std::map<std::string, std::pair<std::vector<int64_t>, mindspore::MSTensor>> pad_info = {};
183   ds = ds->BucketBatchByLength({"image"}, {1, 2}, {1, 2, 3}, &BucketBatchTestFunction, pad_info, true, true);
184   EXPECT_NE(ds, nullptr);
185 
186   // Create an iterator over the result of the above dataset
187   // This will trigger the creation of the Execution Tree and launch it.
188   std::shared_ptr<Iterator> iter = ds->CreateIterator();
189   EXPECT_NE(iter, nullptr);
190 
191   // Iterate over the dataset and get each row
192   std::unordered_map<std::string, mindspore::MSTensor> row;
193   ASSERT_OK(iter->GetNextRow(&row));
194 
195   uint64_t i = 0;
196   while (row.size() != 0) {
197     i++;
198     auto image = row["image"];
199     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
200     ASSERT_OK(iter->GetNextRow(&row));
201   }
202   // With 2 boundaries, 3 buckets are created
203   EXPECT_EQ(i, 3);
204 
205   // Manually terminate the pipeline
206   iter->Stop();
207 }
208 
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail1)209 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail1) {
210   // Empty bucket_boundaries
211   // Calling with function pointer
212   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail1.";
213 
214   // Create a Mnist Dataset
215   std::string folder_path = datasets_root_path_ + "/testMnistData/";
216   std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
217   EXPECT_NE(ds, nullptr);
218 
219   // Create a BucketBatchByLength operation on ds
220   ds = ds->BucketBatchByLength({"image"}, {}, {1});
221   EXPECT_NE(ds, nullptr);
222 
223   // Create an iterator over the result of the above dataset
224   std::shared_ptr<Iterator> iter = ds->CreateIterator();
225   // Expect failure: invalid Op input
226   EXPECT_EQ(iter, nullptr);
227 }
228 
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail2)229 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail2) {
230   // Empty bucket_batch_sizes
231   // Calling with function pointer
232   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail2.";
233 
234   // Create a Mnist Dataset
235   std::string folder_path = datasets_root_path_ + "/testMnistData/";
236   std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
237   EXPECT_NE(ds, nullptr);
238 
239   // Create a BucketBatchByLength operation on ds
240   ds = ds->BucketBatchByLength({"image"}, {1}, {});
241   EXPECT_NE(ds, nullptr);
242 
243   // Create an iterator over the result of the above dataset
244   std::shared_ptr<Iterator> iter = ds->CreateIterator();
245   // Expect failure: invalid Op input
246   EXPECT_EQ(iter, nullptr);
247 }
248 
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail3)249 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail3) {
250   // Negative boundaries
251   // Calling with function pointer
252   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail3.";
253 
254   // Create a Mnist Dataset
255   std::string folder_path = datasets_root_path_ + "/testMnistData/";
256   std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
257   EXPECT_NE(ds, nullptr);
258 
259   // Create a BucketBatchByLength operation on ds
260   ds = ds->BucketBatchByLength({"image"}, {-1, 1}, {1, 2, 3});
261   EXPECT_NE(ds, nullptr);
262 
263   // Create an iterator over the result of the above dataset
264   std::shared_ptr<Iterator> iter = ds->CreateIterator();
265   // Expect failure: invalid Op input
266   EXPECT_EQ(iter, nullptr);
267 }
268 
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail4)269 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail4) {
270   // Boundaries not strictly increasing
271   // Calling with function pointer
272   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail4.";
273 
274   // Create a Mnist Dataset
275   std::string folder_path = datasets_root_path_ + "/testMnistData/";
276   std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
277   EXPECT_NE(ds, nullptr);
278 
279   // Create a BucketBatchByLength operation on ds
280   ds = ds->BucketBatchByLength({"image"}, {2, 2}, {1, 2, 3});
281   EXPECT_NE(ds, nullptr);
282 
283   // Create an iterator over the result of the above dataset
284   std::shared_ptr<Iterator> iter = ds->CreateIterator();
285   // Expect failure: invalid Op input
286   EXPECT_EQ(iter, nullptr);
287 }
288 
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail5)289 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail5) {
290   // Incorrect size of bucket_batch_size
291   // Calling with function pointer
292   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail5.";
293 
294   // Create a Mnist Dataset
295   std::string folder_path = datasets_root_path_ + "/testMnistData/";
296   std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
297   EXPECT_NE(ds, nullptr);
298 
299   // Create a BucketBatchByLength operation on ds
300   ds = ds->BucketBatchByLength({"image"}, {1, 2}, {1, 2});
301   EXPECT_NE(ds, nullptr);
302 
303   // Create an iterator over the result of the above dataset
304   std::shared_ptr<Iterator> iter = ds->CreateIterator();
305   // Expect failure: invalid Op input
306   EXPECT_EQ(iter, nullptr);
307 }
308 
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail6)309 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail6) {
310   // Negative bucket_batch_size
311   // Calling with function pointer
312   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail6.";
313 
314   // Create a Mnist Dataset
315   std::string folder_path = datasets_root_path_ + "/testMnistData/";
316   std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
317   EXPECT_NE(ds, nullptr);
318   // Create a BucketBatchByLength operation on ds
319   ds = ds->BucketBatchByLength({"image"}, {1, 2}, {1, -2, 3});
320   EXPECT_NE(ds, nullptr);
321 
322   // Create an iterator over the result of the above dataset
323   std::shared_ptr<Iterator> iter = ds->CreateIterator();
324   // Expect failure: invalid Op input
325   EXPECT_EQ(iter, nullptr);
326 }
327 
TEST_F(MindDataTestPipeline,TestBucketBatchByLengthFail7)328 TEST_F(MindDataTestPipeline, TestBucketBatchByLengthFail7) {
329   // This should fail because element_length_function is not specified and column_names has more than 1 element.
330   // Calling with function pointer
331   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBucketBatchByLengthFail7.";
332 
333   // Create a Mnist Dataset
334   std::string folder_path = datasets_root_path_ + "/testMnistData/";
335   std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
336   EXPECT_NE(ds, nullptr);
337 
338   // Create a BucketBatchByLength operation on ds
339   ds = ds->BucketBatchByLength({"image", "label"}, {1, 2}, {1, 2, 3});
340   EXPECT_NE(ds, nullptr);
341 
342   // Create an iterator over the result of the above dataset
343   std::shared_ptr<Iterator> iter = ds->CreateIterator();
344   // Expect failure: invalid Op input
345   EXPECT_EQ(iter, nullptr);
346 }
347 
TEST_F(MindDataTestPipeline,TestConcatFail1)348 TEST_F(MindDataTestPipeline, TestConcatFail1) {
349   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail1.";
350   // This case is expected to fail because the input column names of concatenated datasets are not the same
351 
352   // Create an ImageFolder Dataset
353   // Column names: {"image", "label"}
354   std::string folder_path = datasets_root_path_ + "/testPK/data/";
355   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
356   EXPECT_NE(ds, nullptr);
357   std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
358   EXPECT_NE(ds, nullptr);
359 
360   // Create a Rename operation on ds
361   ds2 = ds2->Rename({"image", "label"}, {"col1", "col2"});
362   EXPECT_NE(ds, nullptr);
363 
364   // Create a Concat operation on the ds
365   // Name of datasets to concat doesn't not match
366   ds = ds->Concat({ds2});
367   EXPECT_NE(ds, nullptr);
368 
369   // Create a Batch operation on ds
370   int32_t batch_size = 1;
371   ds = ds->Batch(batch_size);
372   EXPECT_NE(ds, nullptr);
373 
374   // Create an iterator over the result of the above dataset
375   // This will trigger the creation of the Execution Tree and launch it.
376   std::shared_ptr<Iterator> iter = ds->CreateIterator();
377   EXPECT_EQ(iter, nullptr);
378 }
379 
TEST_F(MindDataTestPipeline,TestConcatFail2)380 TEST_F(MindDataTestPipeline, TestConcatFail2) {
381   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail2.";
382   // This case is expected to fail because the input dataset is empty.
383 
384   // Create an ImageFolder Dataset
385   std::string folder_path = datasets_root_path_ + "/testPK/data/";
386   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
387   EXPECT_NE(ds, nullptr);
388 
389   // Create a Concat operation on the ds
390   // Input dataset to concat is empty
391   ds = ds->Concat({});
392   EXPECT_NE(ds, nullptr);
393 
394   // Create an iterator over the result of the above dataset
395   std::shared_ptr<Iterator> iter = ds->CreateIterator();
396   // Expect failure: invalid Op input
397   EXPECT_EQ(iter, nullptr);
398 }
399 
TEST_F(MindDataTestPipeline,TestConcatFail3)400 TEST_F(MindDataTestPipeline, TestConcatFail3) {
401   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail3.";
402   // This case is expected to fail because the input dataset is nullptr.
403 
404   // Create an ImageFolder Dataset
405   std::string folder_path = datasets_root_path_ + "/testPK/data/";
406   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
407   EXPECT_NE(ds, nullptr);
408 
409   // Create a Concat operation on the ds
410   // Input dataset to concat is null
411   ds = ds->Concat({nullptr});
412   EXPECT_NE(ds, nullptr);
413 
414   // Create an iterator over the result of the above dataset
415   std::shared_ptr<Iterator> iter = ds->CreateIterator();
416   // Expect failure: invalid Op input
417   EXPECT_EQ(iter, nullptr);
418 }
419 
TEST_F(MindDataTestPipeline,TestConcatFail4)420 TEST_F(MindDataTestPipeline, TestConcatFail4) {
421   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail4.";
422   // This case is expected to fail because the input dataset is nullptr.
423 
424   // Create an ImageFolder Dataset
425   std::string folder_path = datasets_root_path_ + "/testPK/data/";
426   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
427   EXPECT_NE(ds, nullptr);
428 
429   // Create a Concat operation on the ds
430   // Input dataset to concat is null
431   ds = ds + nullptr;
432   EXPECT_NE(ds, nullptr);
433 
434   // Create an iterator over the result of the above dataset
435   std::shared_ptr<Iterator> iter = ds->CreateIterator();
436   // Expect failure: invalid Op input
437   EXPECT_EQ(iter, nullptr);
438 }
439 
TEST_F(MindDataTestPipeline,TestConcatFail5)440 TEST_F(MindDataTestPipeline, TestConcatFail5) {
441   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatFail5.";
442   // This case is expected to fail because the dataset concat itself which causes ProjectNode has two parent nodes
443 
444   // Create an ImageFolder Dataset
445   std::string folder_path = datasets_root_path_ + "/testPK/data/";
446   std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
447   EXPECT_NE(ds1, nullptr);
448 
449   std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
450   EXPECT_NE(ds2, nullptr);
451 
452   // Create a Project operation on ds
453   ds1 = ds1->Project({"image"});
454   EXPECT_NE(ds1, nullptr);
455   ds2 = ds2->Project({"image"});
456   EXPECT_NE(ds2, nullptr);
457 
458   // Create a Concat operation on the ds
459   // Input dataset is the dataset itself
460   ds1 = ds1 + ds1 + ds2;
461   EXPECT_NE(ds1, nullptr);
462 
463   // Create an iterator over the result of the above dataset
464   std::shared_ptr<Iterator> iter = ds1->CreateIterator();
465   // Expect failure: The data pipeline is not a tree
466   EXPECT_EQ(iter, nullptr);
467 }
468 
TEST_F(MindDataTestPipeline,TestConcatSuccess)469 TEST_F(MindDataTestPipeline, TestConcatSuccess) {
470   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatSuccess.";
471 
472   // Create an ImageFolder Dataset
473   // Column names: {"image", "label"}
474   std::string folder_path = datasets_root_path_ + "/testPK/data/";
475   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
476   EXPECT_NE(ds, nullptr);
477 
478   // Create a Cifar10 Dataset
479   // Column names: {"image", "label"}
480   folder_path = datasets_root_path_ + "/testCifar10Data/";
481   std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 9));
482   EXPECT_NE(ds2, nullptr);
483 
484   // Create a Project operation on ds
485   ds = ds->Project({"image"});
486   EXPECT_NE(ds, nullptr);
487   ds2 = ds2->Project({"image"});
488   EXPECT_NE(ds, nullptr);
489 
490   // Create a Concat operation on the ds
491   ds = ds->Concat({ds2});
492   EXPECT_NE(ds, nullptr);
493 
494   // Create a Batch operation on ds
495   int32_t batch_size = 1;
496   ds = ds->Batch(batch_size);
497   EXPECT_NE(ds, nullptr);
498 
499   // Create an iterator over the result of the above dataset
500   // This will trigger the creation of the Execution Tree and launch it.
501   std::shared_ptr<Iterator> iter = ds->CreateIterator();
502   EXPECT_NE(iter, nullptr);
503 
504   // iterate over the dataset and get each row
505   std::unordered_map<std::string, mindspore::MSTensor> row;
506   ASSERT_OK(iter->GetNextRow(&row));
507   uint64_t i = 0;
508   while (row.size() != 0) {
509     i++;
510     auto image = row["image"];
511     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
512     ASSERT_OK(iter->GetNextRow(&row));
513   }
514 
515   EXPECT_EQ(i, 19);
516   // Manually terminate the pipeline
517   iter->Stop();
518 }
519 
TEST_F(MindDataTestPipeline,TestConcatGetDatasetSize)520 TEST_F(MindDataTestPipeline, TestConcatGetDatasetSize) {
521   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatGetDatasetSize.";
522 
523   // Create an ImageFolder Dataset
524   // Column names: {"image", "label"}
525   std::string folder_path = datasets_root_path_ + "/testPK/data/";
526   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
527   EXPECT_NE(ds, nullptr);
528 
529   // Create a Cifar10 Dataset
530   // Column names: {"image", "label"}
531   folder_path = datasets_root_path_ + "/testCifar10Data/";
532   std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 9));
533   EXPECT_NE(ds2, nullptr);
534 
535   // Create a Project operation on ds
536   ds = ds->Project({"image"});
537   EXPECT_NE(ds, nullptr);
538   ds2 = ds2->Project({"image"});
539   EXPECT_NE(ds, nullptr);
540 
541   // Create a Concat operation on the ds
542   ds = ds->Concat({ds2});
543   EXPECT_NE(ds, nullptr);
544 
545   EXPECT_EQ(ds->GetDatasetSize(), 19);
546 }
547 
TEST_F(MindDataTestPipeline,TestConcatSuccess2)548 TEST_F(MindDataTestPipeline, TestConcatSuccess2) {
549   // Test "+" operator to concat two datasets
550   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConcatSuccess2.";
551 
552   // Create an ImageFolder Dataset
553   // Column names: {"image", "label"}
554   std::string folder_path = datasets_root_path_ + "/testPK/data/";
555   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
556   EXPECT_NE(ds, nullptr);
557 
558   // Create a Cifar10 Dataset
559   // Column names: {"image", "label"}
560   folder_path = datasets_root_path_ + "/testCifar10Data/";
561   std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 9));
562   EXPECT_NE(ds2, nullptr);
563 
564   // Create a Project operation on ds
565   ds = ds->Project({"image"});
566   EXPECT_NE(ds, nullptr);
567   ds2 = ds2->Project({"image"});
568   EXPECT_NE(ds, nullptr);
569 
570   // Create a Concat operation on the ds
571   ds = ds + ds2;
572   EXPECT_NE(ds, nullptr);
573 
574   // Create a Batch operation on ds
575   int32_t batch_size = 1;
576   ds = ds->Batch(batch_size);
577   EXPECT_NE(ds, nullptr);
578 
579   // Create an iterator over the result of the above dataset
580   // This will trigger the creation of the Execution Tree and launch it.
581   std::shared_ptr<Iterator> iter = ds->CreateIterator();
582   EXPECT_NE(iter, nullptr);
583 
584   // iterate over the dataset and get each row
585   std::unordered_map<std::string, mindspore::MSTensor> row;
586   ASSERT_OK(iter->GetNextRow(&row));
587   uint64_t i = 0;
588   while (row.size() != 0) {
589     i++;
590     auto image = row["image"];
591     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
592     ASSERT_OK(iter->GetNextRow(&row));
593   }
594 
595   EXPECT_EQ(i, 19);
596   // Manually terminate the pipeline
597   iter->Stop();
598 }
599 
TEST_F(MindDataTestPipeline,TestFilterSuccess1)600 TEST_F(MindDataTestPipeline, TestFilterSuccess1) {
601   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFilterSuccess1.";
602   // Test basic filter api with specific predicate to judge if label is equal to 3
603 
604   // Create a TFRecord Dataset
605   std::string data_file = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
606   std::string schema_file = datasets_root_path_ + "/test_tf_file_3_images/datasetSchema.json";
607   std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label"}, 0, ShuffleMode::kFalse);
608   EXPECT_NE(ds, nullptr);
609 
610   // Create objects for the tensor ops
611   std::shared_ptr<TensorTransform> decode_op = std::make_shared<vision::Decode>(true);
612   EXPECT_NE(decode_op, nullptr);
613 
614   std::shared_ptr<TensorTransform> resize_op(new vision::Resize({64, 64}));
615   EXPECT_NE(resize_op, nullptr);
616 
617   // Create a Map operation on ds
618   ds = ds->Map({decode_op, resize_op});
619   EXPECT_NE(ds, nullptr);
620 
621   // Create a Filter operation on ds
622   ds = ds->Filter(Predicate1, {"label"});
623   EXPECT_NE(ds, nullptr);
624 
625   // Create an iterator over the result of the above dataset
626   // This will trigger the creation of the Execution Tree and launch it.
627   std::shared_ptr<Iterator> iter = ds->CreateIterator();
628   EXPECT_NE(iter, nullptr);
629 
630   // iterate over the dataset and get each row
631   std::unordered_map<std::string, mindspore::MSTensor> row;
632   ASSERT_OK(iter->GetNextRow(&row));
633 
634   std::vector<uint64_t> label_list;
635   uint64_t i = 0;
636   while (row.size() != 0) {
637     i++;
638     auto label = row["label"];
639 
640     std::shared_ptr<Tensor> de_label;
641     uint64_t label_value;
642     ASSERT_OK(Tensor::CreateFromMSTensor(label, &de_label));
643     ASSERT_OK(de_label->GetItemAt(&label_value, {0}));
644     label_list.push_back(label_value);
645 
646     ASSERT_OK(iter->GetNextRow(&row));
647   }
648 
649   // Only 1 column whose label is equal to 3
650   EXPECT_EQ(i, 1);
651   EXPECT_EQ(label_list.at(0), 3);
652 
653   // Manually terminate the pipeline
654   iter->Stop();
655 }
656 
TEST_F(MindDataTestPipeline,TestFilterSuccess2)657 TEST_F(MindDataTestPipeline, TestFilterSuccess2) {
658   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFilterSuccess2.";
659   // Test filter api without input_columns
660 
661   // Create a TFRecord Dataset
662   std::string data_file = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
663   std::string schema_file = datasets_root_path_ + "/test_tf_file_3_images/datasetSchema.json";
664   std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label"}, 0, ShuffleMode::kFalse);
665   EXPECT_NE(ds, nullptr);
666 
667   // Create a Filter operation on ds
668   ds = ds->Filter(Predicate2);
669   EXPECT_NE(ds, nullptr);
670 
671   // Create an iterator over the result of the above dataset
672   // This will trigger the creation of the Execution Tree and launch it.
673   std::shared_ptr<Iterator> iter = ds->CreateIterator();
674   EXPECT_NE(iter, nullptr);
675 
676   // iterate over the dataset and get each row
677   std::unordered_map<std::string, mindspore::MSTensor> row;
678   ASSERT_OK(iter->GetNextRow(&row));
679 
680   std::vector<uint64_t> label_list;
681   uint64_t i = 0;
682   while (row.size() != 0) {
683     i++;
684     auto label = row["label"];
685 
686     std::shared_ptr<Tensor> de_label;
687     uint64_t label_value;
688     ASSERT_OK(Tensor::CreateFromMSTensor(label, &de_label));
689     ASSERT_OK(de_label->GetItemAt(&label_value, {0}));
690     label_list.push_back(label_value);
691 
692     ASSERT_OK(iter->GetNextRow(&row));
693   }
694 
695   // There are 2 columns whose label is more than 1
696   EXPECT_EQ(i, 2);
697   EXPECT_EQ(label_list.at(0), 2);
698   EXPECT_EQ(label_list.at(1), 3);
699 
700   // Manually terminate the pipeline
701   iter->Stop();
702 }
703 
TEST_F(MindDataTestPipeline,TestFilterFail1)704 TEST_F(MindDataTestPipeline, TestFilterFail1) {
705   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFilterFail1.";
706   // Test filter api with nullptr predicate
707 
708   // Create a TFRecord Dataset
709   std::string data_file = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
710   std::string schema_file = datasets_root_path_ + "/test_tf_file_3_images/datasetSchema.json";
711   std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label"}, 0, ShuffleMode::kFalse);
712   EXPECT_NE(ds, nullptr);
713 
714   std::function<MSTensorVec(MSTensorVec)> predicate_null = nullptr;
715 
716   // Create a Filter operation on ds
717   ds = ds->Filter(predicate_null);
718   EXPECT_NE(ds, nullptr);
719 
720   // Create an iterator over the result of the above dataset
721   std::shared_ptr<Iterator> iter = ds->CreateIterator();
722   // Expect failure: invalid Filter input with nullptr predicate
723   EXPECT_EQ(iter, nullptr);
724 }
725 
TEST_F(MindDataTestPipeline,TestFilterFail2)726 TEST_F(MindDataTestPipeline, TestFilterFail2) {
727   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFilterFail2.";
728   // Test filter api with wrong input_columns
729 
730   // Create a TFRecord Dataset
731   std::string data_file = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
732   std::string schema_file = datasets_root_path_ + "/test_tf_file_3_images/datasetSchema.json";
733   std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label"}, 0, ShuffleMode::kFalse);
734   EXPECT_NE(ds, nullptr);
735 
736   // Create a Filter operation on ds
737   ds = ds->Filter(Predicate1, {"not_exist"});
738   EXPECT_NE(ds, nullptr);
739 
740   // Create an iterator over the result of the above dataset
741   // This will trigger the creation of the Execution Tree and launch it.
742   std::shared_ptr<Iterator> iter = ds->CreateIterator();
743   EXPECT_NE(iter, nullptr);
744 
745   // iterate over the dataset and get each row
746   std::unordered_map<std::string, mindspore::MSTensor> row;
747   EXPECT_ERROR(iter->GetNextRow(&row));
748 
749   uint64_t i = 0;
750   while (row.size() != 0) {
751     i++;
752     EXPECT_ERROR(iter->GetNextRow(&row));
753   }
754 
755   // Expect failure: column check fail and return nothing
756   EXPECT_EQ(i, 0);
757 
758   // Manually terminate the pipeline
759   iter->Stop();
760 }
761 
TEST_F(MindDataTestPipeline,TestFilterFail3)762 TEST_F(MindDataTestPipeline, TestFilterFail3) {
763   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFilterFail3.";
764   // Test filter api with empty input_columns
765 
766   // Create a TFRecord Dataset
767   std::string data_file = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
768   std::string schema_file = datasets_root_path_ + "/test_tf_file_3_images/datasetSchema.json";
769   std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label"}, 0, ShuffleMode::kFalse);
770   EXPECT_NE(ds, nullptr);
771 
772   // Create a Filter operation on ds
773   ds = ds->Filter(Predicate1, {""});
774   EXPECT_NE(ds, nullptr);
775 
776   // Create an iterator over the result of the above dataset
777   std::shared_ptr<Iterator> iter = ds->CreateIterator();
778   // Expect failure: invalid Filter input with empty string of column name
779   EXPECT_EQ(iter, nullptr);
780 }
781 
TEST_F(MindDataTestPipeline,TestImageFolderBatchAndRepeat)782 TEST_F(MindDataTestPipeline, TestImageFolderBatchAndRepeat) {
783   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestImageFolderBatchAndRepeat.";
784 
785   // Create an ImageFolder Dataset
786   std::string folder_path = datasets_root_path_ + "/testPK/data/";
787   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
788   EXPECT_NE(ds, nullptr);
789 
790   // Create a Repeat operation on ds
791   int32_t repeat_num = 2;
792   ds = ds->Repeat(repeat_num);
793   EXPECT_NE(ds, nullptr);
794 
795   // Create a Batch operation on ds
796   int32_t batch_size = 2;
797   ds = ds->Batch(batch_size);
798   EXPECT_NE(ds, nullptr);
799 
800   // Create an iterator over the result of the above dataset
801   // This will trigger the creation of the Execution Tree and launch it.
802   std::shared_ptr<Iterator> iter = ds->CreateIterator();
803   EXPECT_NE(iter, nullptr);
804 
805   // iterate over the dataset and get each row
806   std::unordered_map<std::string, mindspore::MSTensor> row;
807   ASSERT_OK(iter->GetNextRow(&row));
808 
809   uint64_t i = 0;
810   while (row.size() != 0) {
811     i++;
812     auto image = row["image"];
813     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
814     ASSERT_OK(iter->GetNextRow(&row));
815   }
816 
817   EXPECT_EQ(i, 10);
818 
819   // Manually terminate the pipeline
820   iter->Stop();
821 }
822 
TEST_F(MindDataTestPipeline,TestPipelineGetDatasetSize)823 TEST_F(MindDataTestPipeline, TestPipelineGetDatasetSize) {
824   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestPipelineGetDatasetSize.";
825 
826   // Create an ImageFolder Dataset
827   std::string folder_path = datasets_root_path_ + "/testPK/data/";
828   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
829   EXPECT_NE(ds, nullptr);
830 
831   // Create a Repeat operation on ds
832   int32_t repeat_num = 2;
833   ds = ds->Repeat(repeat_num);
834   EXPECT_NE(ds, nullptr);
835 
836   // Create a Batch operation on ds
837   int32_t batch_size = 2;
838   ds = ds->Batch(batch_size);
839   EXPECT_NE(ds, nullptr);
840 
841   EXPECT_EQ(ds->GetDatasetSize(), 10);
842 }
843 
TEST_F(MindDataTestPipeline,TestDistributedGetDatasetSize1)844 TEST_F(MindDataTestPipeline, TestDistributedGetDatasetSize1) {
845   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedGetDatasetSize1.";
846   // Test get dataset size in distributed scenario when num_per_shard is more than num_samples
847 
848   // Create an ImageFolder Dataset
849   std::string folder_path = datasets_root_path_ + "/testPK/data/";
850   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<DistributedSampler>(4, 0, false, 10));
851   EXPECT_NE(ds, nullptr);
852 
853   // num_per_shard is equal to 44/4 = 11 which is more than num_samples = 10, so the output is 10
854   EXPECT_EQ(ds->GetDatasetSize(), 10);
855 
856   // Create an iterator over the result of the above dataset
857   // This will trigger the creation of the Execution Tree and launch it.
858   std::shared_ptr<Iterator> iter = ds->CreateIterator();
859   EXPECT_NE(iter, nullptr);
860 
861   // iterate over the dataset and get each row
862   std::unordered_map<std::string, mindspore::MSTensor> row;
863   ASSERT_OK(iter->GetNextRow(&row));
864 
865   uint64_t i = 0;
866   while (row.size() != 0) {
867     i++;
868     ASSERT_OK(iter->GetNextRow(&row));
869   }
870 
871   // The value of i should be equal to the result of get dataset size
872   EXPECT_EQ(i, 10);
873 }
874 
TEST_F(MindDataTestPipeline,TestDistributedGetDatasetSize2)875 TEST_F(MindDataTestPipeline, TestDistributedGetDatasetSize2) {
876   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestDistributedGetDatasetSize2.";
877   // Test get dataset size in distributed scenario when num_per_shard is less than num_samples
878 
879   // Create an ImageFolder Dataset
880   std::string folder_path = datasets_root_path_ + "/testPK/data/";
881   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<DistributedSampler>(4, 0, false, 15));
882   EXPECT_NE(ds, nullptr);
883 
884   // num_per_shard is equal to 44/4 = 11 which is less than num_samples = 15, so the output is 11
885   EXPECT_EQ(ds->GetDatasetSize(), 11);
886 
887   // Create an iterator over the result of the above dataset
888   // This will trigger the creation of the Execution Tree and launch it.
889   std::shared_ptr<Iterator> iter = ds->CreateIterator();
890   EXPECT_NE(iter, nullptr);
891 
892   // iterate over the dataset and get each row
893   std::unordered_map<std::string, mindspore::MSTensor> row;
894   ASSERT_OK(iter->GetNextRow(&row));
895 
896   uint64_t i = 0;
897   while (row.size() != 0) {
898     i++;
899     ASSERT_OK(iter->GetNextRow(&row));
900   }
901 
902   // The value of i should be equal to the result of get dataset size
903   EXPECT_EQ(i, 11);
904 }
905 
TEST_F(MindDataTestPipeline,TestProjectMap)906 TEST_F(MindDataTestPipeline, TestProjectMap) {
907   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProjectMap.";
908 
909   // Create an ImageFolder Dataset
910   std::string folder_path = datasets_root_path_ + "/testPK/data/";
911   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
912   EXPECT_NE(ds, nullptr);
913 
914   // Create a Repeat operation on ds
915   int32_t repeat_num = 2;
916   ds = ds->Repeat(repeat_num);
917   EXPECT_NE(ds, nullptr);
918 
919   // Create objects for the tensor ops
920   std::shared_ptr<TensorTransform> random_vertical_flip_op = std::make_shared<vision::RandomVerticalFlip>(0.5);
921   EXPECT_NE(random_vertical_flip_op, nullptr);
922 
923   // Create a Map operation on ds
924   ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"});
925   EXPECT_NE(ds, nullptr);
926 
927   // Create a Project operation on ds
928   std::vector<std::string> column_project = {"image"};
929   ds = ds->Project(column_project);
930   EXPECT_NE(ds, nullptr);
931 
932   // Create a Batch operation on ds
933   int32_t batch_size = 1;
934   ds = ds->Batch(batch_size);
935   EXPECT_NE(ds, nullptr);
936 
937   // Create an iterator over the result of the above dataset
938   // This will trigger the creation of the Execution Tree and launch it.
939   std::shared_ptr<Iterator> iter = ds->CreateIterator();
940   EXPECT_NE(iter, nullptr);
941 
942   // iterate over the dataset and get each row
943   std::unordered_map<std::string, mindspore::MSTensor> row;
944   ASSERT_OK(iter->GetNextRow(&row));
945 
946   uint64_t i = 0;
947   while (row.size() != 0) {
948     i++;
949     auto image = row["image"];
950     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
951     ASSERT_OK(iter->GetNextRow(&row));
952   }
953 
954   EXPECT_EQ(i, 20);
955 
956   // Manually terminate the pipeline
957   iter->Stop();
958 }
959 
TEST_F(MindDataTestPipeline,TestProjectDuplicateColumnFail)960 TEST_F(MindDataTestPipeline, TestProjectDuplicateColumnFail) {
961   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestProjectDuplicateColumnFail.";
962 
963   // Create an ImageFolder Dataset
964   std::string folder_path = datasets_root_path_ + "/testPK/data/";
965   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 3));
966   EXPECT_NE(ds, nullptr);
967 
968   // Create objects for the tensor ops
969   std::shared_ptr<TensorTransform> random_vertical_flip_op = std::make_shared<vision::RandomVerticalFlip>(0.5);
970   EXPECT_NE(random_vertical_flip_op, nullptr);
971 
972   // Create a Map operation on ds
973   ds = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "label"});
974   EXPECT_NE(ds, nullptr);
975 
976   // Create a Project operation on ds
977   std::vector<std::string> column_project = {"image", "image"};
978 
979   // Create a Project operation on ds
980   ds = ds->Project(column_project);
981   EXPECT_NE(ds, nullptr);
982 
983   // Create an iterator over the result of the above dataset
984   std::shared_ptr<Iterator> iter = ds->CreateIterator();
985   // Expect failure: duplicate project op column name
986   EXPECT_EQ(iter, nullptr);
987 }
988 
TEST_F(MindDataTestPipeline,TestMapDuplicateColumnFail)989 TEST_F(MindDataTestPipeline, TestMapDuplicateColumnFail) {
990   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMapDuplicateColumnFail.";
991 
992   // Create an ImageFolder Dataset
993   std::string folder_path = datasets_root_path_ + "/testPK/data/";
994   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
995   EXPECT_NE(ds, nullptr);
996 
997   // Create objects for the tensor ops
998   std::shared_ptr<TensorTransform> random_vertical_flip_op = std::make_shared<vision::RandomVerticalFlip>(0.5);
999   EXPECT_NE(random_vertical_flip_op, nullptr);
1000 
1001   // Create a Map operation on ds
1002   auto ds1 = ds->Map({random_vertical_flip_op}, {"image", "image"}, {}, {});
1003   EXPECT_NE(ds1, nullptr);
1004 
1005   // Create an iterator over the result of the above dataset
1006   std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
1007   // Expect failure: duplicate Map op input column name
1008   EXPECT_EQ(iter1, nullptr);
1009 
1010   // Create a Map operation on ds
1011   auto ds2 = ds->Map({random_vertical_flip_op}, {}, {"label", "label"}, {});
1012   EXPECT_NE(ds2, nullptr);
1013 
1014   // Create an iterator over the result of the above dataset
1015   std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
1016   // Expect failure: duplicate Map op output column name
1017   EXPECT_EQ(iter2, nullptr);
1018 
1019   // Create a Map operation on ds
1020   auto ds3 = ds->Map({random_vertical_flip_op}, {}, {}, {"image", "image"});
1021   EXPECT_NE(ds3, nullptr);
1022 
1023   // Create an iterator over the result of the above dataset
1024   std::shared_ptr<Iterator> iter3 = ds3->CreateIterator();
1025   // Expect failure: duplicate Map op project column name
1026   EXPECT_EQ(iter3, nullptr);
1027 }
1028 
TEST_F(MindDataTestPipeline,TestMapNullOperation)1029 TEST_F(MindDataTestPipeline, TestMapNullOperation) {
1030   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMapNullOperation.";
1031 
1032   // Create an ImageFolder Dataset
1033   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1034   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1035   EXPECT_NE(ds, nullptr);
1036 
1037   // Create a Map operation on ds
1038   std::shared_ptr<TensorTransform> operation = nullptr;
1039   auto ds1 = ds->Map({operation}, {"image"}, {}, {});
1040   EXPECT_NE(ds1, nullptr);
1041 
1042   // Create an iterator over the result of the above dataset
1043   std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
1044   // Expect failure: Operation is nullptr
1045   EXPECT_EQ(iter1, nullptr);
1046 }
1047 
TEST_F(MindDataTestPipeline,TestProjectMapAutoInjection)1048 TEST_F(MindDataTestPipeline, TestProjectMapAutoInjection) {
1049   MS_LOG(INFO) << "Doing MindDataTestPipeline.TestProjectMapAutoInjection";
1050 
1051   // Create an ImageFolder Dataset
1052   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1053   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1054   EXPECT_NE(ds, nullptr);
1055 
1056   // Create a Repeat operation on ds
1057   int32_t repeat_num = 2;
1058   ds = ds->Repeat(repeat_num);
1059   EXPECT_NE(ds, nullptr);
1060 
1061   // Create objects for the tensor ops
1062   std::shared_ptr<TensorTransform> resize_op(new vision::Resize({30, 30}));
1063   EXPECT_NE(resize_op, nullptr);
1064 
1065   // Create a Map operation on ds
1066   // {"image"} is the project columns. This will trigger auto injection of ProjectOp after MapOp.
1067   ds = ds->Map({resize_op}, {}, {}, {"image"});
1068   EXPECT_NE(ds, nullptr);
1069 
1070   // Create an iterator over the result of the above dataset
1071   // This will trigger the creation of the Execution Tree and launch it.
1072   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1073   EXPECT_NE(iter, nullptr);
1074 
1075   // iterate over the dataset and get each row
1076   std::unordered_map<std::string, mindspore::MSTensor> row;
1077   ASSERT_OK(iter->GetNextRow(&row));
1078 
1079   // 'label' is dropped during the project op
1080   EXPECT_EQ(row.find("label"), row.end());
1081   // 'image' column should still exist
1082   EXPECT_NE(row.find("image"), row.end());
1083 
1084   uint64_t i = 0;
1085   while (row.size() != 0) {
1086     i++;
1087     auto image = row["image"];
1088     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1089     EXPECT_EQ(image.Shape()[0], 30);
1090     ASSERT_OK(iter->GetNextRow(&row));
1091   }
1092 
1093   EXPECT_EQ(i, 20);
1094 
1095   // Manually terminate the pipeline
1096   iter->Stop();
1097 }
1098 
TEST_F(MindDataTestPipeline,TestRenameFail1)1099 TEST_F(MindDataTestPipeline, TestRenameFail1) {
1100   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail1.";
1101   // We expect this test to fail because input and output in Rename are not the same size
1102 
1103   // Create an ImageFolder Dataset
1104   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1105   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1106   EXPECT_NE(ds, nullptr);
1107 
1108   // Create a Repeat operation on ds
1109   int32_t repeat_num = 2;
1110   ds = ds->Repeat(repeat_num);
1111   EXPECT_NE(ds, nullptr);
1112 
1113   // Create a Rename operation on ds
1114   ds = ds->Rename({"image", "label"}, {"col2"});
1115   EXPECT_NE(ds, nullptr);
1116 
1117   // Create an iterator over the result of the above dataset
1118   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1119   // Expect failure: invalid Op input
1120   EXPECT_EQ(iter, nullptr);
1121 }
1122 
TEST_F(MindDataTestPipeline,TestRenameFail2)1123 TEST_F(MindDataTestPipeline, TestRenameFail2) {
1124   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail2.";
1125   // We expect this test to fail because input or output column name is empty
1126 
1127   // Create an ImageFolder Dataset
1128   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1129   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1130   EXPECT_NE(ds, nullptr);
1131 
1132   // Create a Rename operation on ds
1133   ds = ds->Rename({"image", "label"}, {"col2", ""});
1134   EXPECT_NE(ds, nullptr);
1135 
1136   // Create an iterator over the result of the above dataset
1137   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1138   // Expect failure: invalid Op input
1139   EXPECT_EQ(iter, nullptr);
1140 }
1141 
TEST_F(MindDataTestPipeline,TestRenameFail3)1142 TEST_F(MindDataTestPipeline, TestRenameFail3) {
1143   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameFail3.";
1144   // We expect this test to fail because duplicate column name
1145 
1146   // Create an ImageFolder Dataset
1147   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1148   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1149   EXPECT_NE(ds, nullptr);
1150 
1151   // Create a Rename operation on ds
1152   auto ds1 = ds->Rename({"image", "image"}, {"col1", "col2"});
1153   EXPECT_NE(ds1, nullptr);
1154 
1155   // Create an iterator over the result of the above dataset
1156   std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
1157   // Expect failure: invalid Op input
1158   EXPECT_EQ(iter1, nullptr);
1159 
1160   // Create a Rename operation on ds
1161   auto ds2 = ds->Rename({"image", "label"}, {"col1", "col1"});
1162   EXPECT_NE(ds2, nullptr);
1163 
1164   // Create an iterator over the result of the above dataset
1165   std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
1166   // Expect failure: invalid Op input
1167   EXPECT_EQ(iter2, nullptr);
1168 }
1169 
TEST_F(MindDataTestPipeline,TestRenameSuccess)1170 TEST_F(MindDataTestPipeline, TestRenameSuccess) {
1171   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRenameSuccess.";
1172 
1173   // Create an ImageFolder Dataset
1174   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1175   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1176   EXPECT_NE(ds, nullptr);
1177 
1178   // Create a Repeat operation on ds
1179   int32_t repeat_num = 2;
1180   ds = ds->Repeat(repeat_num);
1181   EXPECT_NE(ds, nullptr);
1182 
1183   // Create a Rename operation on ds
1184   ds = ds->Rename({"image", "label"}, {"col1", "col2"});
1185   EXPECT_NE(ds, nullptr);
1186 
1187   // Create a Batch operation on ds
1188   int32_t batch_size = 1;
1189   ds = ds->Batch(batch_size);
1190   EXPECT_NE(ds, nullptr);
1191 
1192   // Create an iterator over the result of the above dataset
1193   // This will trigger the creation of the Execution Tree and launch it.
1194   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1195   EXPECT_NE(iter, nullptr);
1196 
1197   // iterate over the dataset and get each row
1198   std::unordered_map<std::string, mindspore::MSTensor> row;
1199   ASSERT_OK(iter->GetNextRow(&row));
1200 
1201   uint64_t i = 0;
1202   EXPECT_NE(row.find("col1"), row.end());
1203   EXPECT_NE(row.find("col2"), row.end());
1204   EXPECT_EQ(row.find("image"), row.end());
1205   EXPECT_EQ(row.find("label"), row.end());
1206 
1207   while (row.size() != 0) {
1208     i++;
1209     auto image = row["col1"];
1210     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1211     ASSERT_OK(iter->GetNextRow(&row));
1212   }
1213 
1214   EXPECT_EQ(i, 20);
1215 
1216   // Manually terminate the pipeline
1217   iter->Stop();
1218 }
1219 
TEST_F(MindDataTestPipeline,TestRepeatDefault)1220 TEST_F(MindDataTestPipeline, TestRepeatDefault) {
1221   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRepeatDefault.";
1222 
1223   // Create an ImageFolder Dataset
1224   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1225   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1226   EXPECT_NE(ds, nullptr);
1227 
1228   // Create a Repeat operation on ds
1229   // Default value of repeat count is -1, expected to repeat infinitely
1230   ds = ds->Repeat();
1231   EXPECT_NE(ds, nullptr);
1232 
1233   // Create a Batch operation on ds
1234   int32_t batch_size = 1;
1235   ds = ds->Batch(batch_size);
1236   EXPECT_NE(ds, nullptr);
1237 
1238   // Create an iterator over the result of the above dataset
1239   // This will trigger the creation of the Execution Tree and launch it.
1240   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1241   EXPECT_NE(iter, nullptr);
1242 
1243   // iterate over the dataset and get each row
1244   std::unordered_map<std::string, mindspore::MSTensor> row;
1245   ASSERT_OK(iter->GetNextRow(&row));
1246   uint64_t i = 0;
1247   while (row.size() != 0) {
1248     // manually stop
1249     if (i == 100) {
1250       break;
1251     }
1252     i++;
1253     auto image = row["image"];
1254     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1255     ASSERT_OK(iter->GetNextRow(&row));
1256   }
1257 
1258   EXPECT_EQ(i, 100);
1259   // Manually terminate the pipeline
1260   iter->Stop();
1261 }
1262 
TEST_F(MindDataTestPipeline,TestRepeatOne)1263 TEST_F(MindDataTestPipeline, TestRepeatOne) {
1264   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRepeatOne.";
1265 
1266   // Create an ImageFolder Dataset
1267   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1268   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1269   EXPECT_NE(ds, nullptr);
1270 
1271   // Create a Repeat operation on ds
1272   int32_t repeat_num = 1;
1273   ds = ds->Repeat(repeat_num);
1274   EXPECT_NE(ds, nullptr);
1275 
1276   // Create a Batch operation on ds
1277   int32_t batch_size = 1;
1278   ds = ds->Batch(batch_size);
1279   EXPECT_NE(ds, nullptr);
1280 
1281   // Create an iterator over the result of the above dataset
1282   // This will trigger the creation of the Execution Tree and launch it.
1283   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1284   EXPECT_NE(iter, nullptr);
1285 
1286   // iterate over the dataset and get each row
1287   std::unordered_map<std::string, mindspore::MSTensor> row;
1288   ASSERT_OK(iter->GetNextRow(&row));
1289   uint64_t i = 0;
1290   while (row.size() != 0) {
1291     i++;
1292     auto image = row["image"];
1293     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1294     ASSERT_OK(iter->GetNextRow(&row));
1295   }
1296 
1297   EXPECT_EQ(i, 10);
1298   // Manually terminate the pipeline
1299   iter->Stop();
1300 }
1301 
TEST_F(MindDataTestPipeline,TestRepeatFail1)1302 TEST_F(MindDataTestPipeline, TestRepeatFail1) {
1303   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRepeatFail1.";
1304 
1305   // Create an ImageFolder Dataset
1306   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1307   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1308   EXPECT_NE(ds, nullptr);
1309 
1310   // Create a Repeat operation on ds
1311   int32_t repeat_num = 0;
1312   ds = ds->Repeat(repeat_num);
1313   EXPECT_NE(ds, nullptr);
1314 
1315   // Create an iterator over the result of the above dataset
1316   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1317   // Expect failure: invalid Op input
1318   EXPECT_EQ(iter, nullptr);
1319 }
1320 
TEST_F(MindDataTestPipeline,TestRepeatFail2)1321 TEST_F(MindDataTestPipeline, TestRepeatFail2) {
1322   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRepeatFail2.";
1323   // This case is expected to fail because the repeat count is invalid (<-1 && !=0).
1324 
1325   // Create an ImageFolder Dataset
1326   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1327   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1328   EXPECT_NE(ds, nullptr);
1329 
1330   // Create a Repeat operation on ds
1331   int32_t repeat_num = -2;
1332   ds = ds->Repeat(repeat_num);
1333   EXPECT_NE(ds, nullptr);
1334 
1335   // Create an iterator over the result of the above dataset
1336   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1337   // Expect failure: invalid Op input
1338   EXPECT_EQ(iter, nullptr);
1339 }
1340 
TEST_F(MindDataTestPipeline,TestShuffleDataset)1341 TEST_F(MindDataTestPipeline, TestShuffleDataset) {
1342   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestShuffleDataset.";
1343 
1344   // Create an ImageFolder Dataset
1345   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1346   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1347   EXPECT_NE(ds, nullptr);
1348 
1349   // Create a Shuffle operation on ds
1350   int32_t shuffle_size = 10;
1351   ds = ds->Shuffle(shuffle_size);
1352   EXPECT_NE(ds, nullptr);
1353 
1354   // Create a Repeat operation on ds
1355   int32_t repeat_num = 2;
1356   ds = ds->Repeat(repeat_num);
1357   EXPECT_NE(ds, nullptr);
1358 
1359   // Create a Batch operation on ds
1360   int32_t batch_size = 2;
1361   ds = ds->Batch(batch_size);
1362   EXPECT_NE(ds, nullptr);
1363 
1364   // Create an iterator over the result of the above dataset
1365   // This will trigger the creation of the Execution Tree and launch it.
1366   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1367   EXPECT_NE(iter, nullptr);
1368 
1369   // iterate over the dataset and get each row
1370   std::unordered_map<std::string, mindspore::MSTensor> row;
1371   ASSERT_OK(iter->GetNextRow(&row));
1372 
1373   uint64_t i = 0;
1374   while (row.size() != 0) {
1375     i++;
1376     auto image = row["image"];
1377     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1378     ASSERT_OK(iter->GetNextRow(&row));
1379   }
1380 
1381   EXPECT_EQ(i, 10);
1382 
1383   // Manually terminate the pipeline
1384   iter->Stop();
1385 }
1386 
TEST_F(MindDataTestPipeline,TestSkipDataset)1387 TEST_F(MindDataTestPipeline, TestSkipDataset) {
1388   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSkipDataset.";
1389 
1390   // Create an ImageFolder Dataset
1391   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1392   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1393   EXPECT_NE(ds, nullptr);
1394 
1395   // Create a Skip operation on ds
1396   int32_t count = 3;
1397   ds = ds->Skip(count);
1398   EXPECT_NE(ds, nullptr);
1399 
1400   // Create an iterator over the result of the above dataset
1401   // This will trigger the creation of the Execution Tree and launch it.
1402   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1403   EXPECT_NE(iter, nullptr);
1404 
1405   // iterate over the dataset and get each row
1406   std::unordered_map<std::string, mindspore::MSTensor> row;
1407   ASSERT_OK(iter->GetNextRow(&row));
1408 
1409   uint64_t i = 0;
1410   while (row.size() != 0) {
1411     i++;
1412     auto image = row["image"];
1413     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1414     ASSERT_OK(iter->GetNextRow(&row));
1415   }
1416   MS_LOG(INFO) << "Number of rows: " << i;
1417 
1418   // Expect 10-3=7 rows
1419   EXPECT_EQ(i, 7);
1420 
1421   // Manually terminate the pipeline
1422   iter->Stop();
1423 }
1424 
TEST_F(MindDataTestPipeline,TestSkipTakeRepeat)1425 TEST_F(MindDataTestPipeline, TestSkipTakeRepeat) {
1426   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSkipTakeRepeat.";
1427 
1428   // Create an ImageFolder Dataset
1429   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1430   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 6));
1431 
1432   // Create a Skip operation on ds
1433   int32_t count = 0;
1434   ds = ds->Skip(count);
1435 
1436   // Create a Project operation on ds
1437   std::vector<std::string> column_project = {"image"};
1438   ds = ds->Project(column_project);
1439 
1440   // Add a Take(-1)
1441   ds = ds->Take(-1);
1442 
1443   // Add a Repeat(1)
1444   ds = ds->Repeat(1);
1445 
1446   // Create an iterator over the result of the above dataset
1447   // This will trigger the creation of the Execution Tree and launch it.
1448   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1449 
1450   // iterate over the dataset and get each row
1451   std::unordered_map<std::string, mindspore::MSTensor> row;
1452   ASSERT_OK(iter->GetNextRow(&row));
1453 
1454   uint64_t i = 0;
1455   while (row.size() != 0) {
1456     i++;
1457     auto image = row["image"];
1458     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1459     ASSERT_OK(iter->GetNextRow(&row));
1460   }
1461   MS_LOG(INFO) << "Number of rows: " << i;
1462 
1463   // Expect 6 rows
1464   EXPECT_EQ(i, 6);
1465 
1466   // Manually terminate the pipeline
1467   iter->Stop();
1468 }
1469 
TEST_F(MindDataTestPipeline,TestSkipGetDatasetSize)1470 TEST_F(MindDataTestPipeline, TestSkipGetDatasetSize) {
1471   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSkipGetDatasetSize.";
1472 
1473   // Create an ImageFolder Dataset
1474   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1475   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1476   EXPECT_NE(ds, nullptr);
1477 
1478   // Create a Skip operation on ds
1479   int32_t count = 3;
1480   ds = ds->Skip(count);
1481   EXPECT_NE(ds, nullptr);
1482 
1483   EXPECT_EQ(ds->GetDatasetSize(), 7);
1484 }
1485 
TEST_F(MindDataTestPipeline,TestSkipDatasetError1)1486 TEST_F(MindDataTestPipeline, TestSkipDatasetError1) {
1487   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSkipDatasetError1.";
1488 
1489   // Create an ImageFolder Dataset
1490   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1491   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1492   EXPECT_NE(ds, nullptr);
1493 
1494   // Create a Skip operation on ds with invalid count input
1495   int32_t count = -1;
1496   ds = ds->Skip(count);
1497   EXPECT_NE(ds, nullptr);
1498 
1499   // Create an iterator over the result of the above dataset
1500   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1501   // Expect failure: invalid Skip input
1502   EXPECT_EQ(iter, nullptr);
1503 }
1504 
TEST_F(MindDataTestPipeline,TestTakeDatasetDefault)1505 TEST_F(MindDataTestPipeline, TestTakeDatasetDefault) {
1506   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTakeDatasetDefault.";
1507 
1508   // Create an ImageFolder Dataset
1509   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1510   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 7));
1511   EXPECT_NE(ds, nullptr);
1512 
1513   // Create a Take operation on ds, default count = -1
1514   ds = ds->Take();
1515   EXPECT_NE(ds, nullptr);
1516 
1517   // Create an iterator over the result of the above dataset
1518   // This will trigger the creation of the Execution Tree and launch it.
1519   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1520   EXPECT_NE(iter, nullptr);
1521 
1522   // iterate over the dataset and get each row
1523   std::unordered_map<std::string, mindspore::MSTensor> row;
1524   ASSERT_OK(iter->GetNextRow(&row));
1525 
1526   uint64_t i = 0;
1527   while (row.size() != 0) {
1528     i++;
1529     auto image = row["image"];
1530     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1531     ASSERT_OK(iter->GetNextRow(&row));
1532   }
1533   MS_LOG(INFO) << "Number of rows: " << i;
1534 
1535   // Expect 7 rows
1536   EXPECT_EQ(i, 7);
1537 
1538   // Manually terminate the pipeline
1539   iter->Stop();
1540 }
1541 
TEST_F(MindDataTestPipeline,TestTakeGetDatasetSize)1542 TEST_F(MindDataTestPipeline, TestTakeGetDatasetSize) {
1543   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTakeGetDatasetSize.";
1544 
1545   // Create an ImageFolder Dataset
1546   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1547   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 7));
1548   EXPECT_NE(ds, nullptr);
1549 
1550   // Create a Take operation on ds, default count = -1
1551   ds = ds->Take(2);
1552   EXPECT_NE(ds, nullptr);
1553 
1554   EXPECT_EQ(ds->GetDatasetSize(), 2);
1555 }
1556 
TEST_F(MindDataTestPipeline,TestTakeDatasetError1)1557 TEST_F(MindDataTestPipeline, TestTakeDatasetError1) {
1558   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTakeDatasetError1.";
1559 
1560   // Create an ImageFolder Dataset
1561   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1562   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1563   EXPECT_NE(ds, nullptr);
1564 
1565   // Create a Take operation on ds with invalid count input
1566   int32_t count = -5;
1567   auto ds1 = ds->Take(count);
1568   EXPECT_NE(ds1, nullptr);
1569 
1570   // Create an iterator over the result of the above dataset
1571   std::shared_ptr<Iterator> iter = ds1->CreateIterator();
1572   // Expect failure: invalid Op input
1573   EXPECT_EQ(iter, nullptr);
1574 
1575   // Create a Take operation on ds with invalid count input
1576   count = 0;
1577   auto ds2 = ds->Take(count);
1578   EXPECT_NE(ds2, nullptr);
1579 
1580   // Create an iterator over the result of the above dataset
1581   iter = ds2->CreateIterator();
1582   // Expect failure: invalid Op input
1583   EXPECT_EQ(iter, nullptr);
1584 }
1585 
TEST_F(MindDataTestPipeline,TestTakeDatasetNormal)1586 TEST_F(MindDataTestPipeline, TestTakeDatasetNormal) {
1587   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTakeDatasetNormal.";
1588 
1589   // Create an ImageFolder Dataset
1590   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1591   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 8));
1592   EXPECT_NE(ds, nullptr);
1593 
1594   // Create a Take operation on ds
1595   ds = ds->Take(5);
1596   EXPECT_NE(ds, nullptr);
1597 
1598   // Create an iterator over the result of the above dataset
1599   // This will trigger the creation of the Execution Tree and launch it.
1600   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1601   EXPECT_NE(iter, nullptr);
1602 
1603   // iterate over the dataset and get each row
1604   std::unordered_map<std::string, mindspore::MSTensor> row;
1605   ASSERT_OK(iter->GetNextRow(&row));
1606 
1607   uint64_t i = 0;
1608   while (row.size() != 0) {
1609     i++;
1610     auto image = row["image"];
1611     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1612     ASSERT_OK(iter->GetNextRow(&row));
1613   }
1614   MS_LOG(INFO) << "Number of rows: " << i;
1615 
1616   // Expect 5 rows
1617   EXPECT_EQ(i, 5);
1618 
1619   // Manually terminate the pipeline
1620   iter->Stop();
1621 }
1622 
TEST_F(MindDataTestPipeline,TestTensorOpsAndMap)1623 TEST_F(MindDataTestPipeline, TestTensorOpsAndMap) {
1624   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTensorOpsAndMap.";
1625 
1626   // Create a Mnist Dataset
1627   std::string folder_path = datasets_root_path_ + "/testMnistData/";
1628   std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 20));
1629   EXPECT_NE(ds, nullptr);
1630 
1631   // Create a Repeat operation on ds
1632   int32_t repeat_num = 2;
1633   ds = ds->Repeat(repeat_num);
1634   EXPECT_NE(ds, nullptr);
1635 
1636   // Create objects for the tensor ops
1637   std::shared_ptr<TensorTransform> resize_op(new vision::Resize({30, 30}));
1638   EXPECT_NE(resize_op, nullptr);
1639 
1640   std::shared_ptr<TensorTransform> center_crop_op(new vision::CenterCrop({16, 16}));
1641   EXPECT_NE(center_crop_op, nullptr);
1642 
1643   // Create a Map operation on ds
1644   ds = ds->Map({resize_op, center_crop_op});
1645   EXPECT_NE(ds, nullptr);
1646 
1647   // Create a Batch operation on ds
1648   int32_t batch_size = 1;
1649   ds = ds->Batch(batch_size);
1650   EXPECT_NE(ds, nullptr);
1651 
1652   // Create an iterator over the result of the above dataset
1653   // This will trigger the creation of the Execution Tree and launch it.
1654   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1655   EXPECT_NE(iter, nullptr);
1656 
1657   // iterate over the dataset and get each row
1658   std::unordered_map<std::string, mindspore::MSTensor> row;
1659   ASSERT_OK(iter->GetNextRow(&row));
1660 
1661   uint64_t i = 0;
1662   while (row.size() != 0) {
1663     i++;
1664     auto image = row["image"];
1665     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1666     ASSERT_OK(iter->GetNextRow(&row));
1667   }
1668 
1669   EXPECT_EQ(i, 40);
1670 
1671   // Manually terminate the pipeline
1672   iter->Stop();
1673 }
1674 
TEST_F(MindDataTestPipeline,TestZipFail)1675 TEST_F(MindDataTestPipeline, TestZipFail) {
1676   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipFail.";
1677   // We expect this test to fail because we are the both datasets we are zipping have "image" and "label" columns
1678   // and zip doesn't accept datasets with same column names
1679 
1680   // Create an ImageFolder Dataset
1681   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1682   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1683   EXPECT_NE(ds, nullptr);
1684 
1685   // Create an ImageFolder Dataset
1686   std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1687   EXPECT_NE(ds1, nullptr);
1688 
1689   // Create a Zip operation on the datasets
1690   ds = Zip({ds, ds1});
1691   EXPECT_NE(ds, nullptr);
1692 
1693   // Create a Batch operation on ds
1694   int32_t batch_size = 1;
1695   ds = ds->Batch(batch_size);
1696   EXPECT_NE(ds, nullptr);
1697 
1698   // Create an iterator over the result of the above dataset
1699   // This will trigger the creation of the Execution Tree and launch it.
1700   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1701   EXPECT_EQ(iter, nullptr);
1702 }
1703 
TEST_F(MindDataTestPipeline,TestZipFail2)1704 TEST_F(MindDataTestPipeline, TestZipFail2) {
1705   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipFail2.";
1706   // This case is expected to fail because the input dataset is empty.
1707 
1708   // Create an ImageFolder Dataset
1709   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1710   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1711   EXPECT_NE(ds, nullptr);
1712 
1713   // Create a Zip operation on the datasets
1714   // Input dataset to zip is empty
1715   ds = Zip({});
1716   EXPECT_NE(ds, nullptr);
1717 
1718   // Create an iterator over the result of the above dataset
1719   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1720   // Expect failure: invalid Op input
1721   EXPECT_EQ(iter, nullptr);
1722 }
1723 
TEST_F(MindDataTestPipeline,TestZipSuccess)1724 TEST_F(MindDataTestPipeline, TestZipSuccess) {
1725   // Testing the member zip() function
1726   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipSuccess.";
1727 
1728   // Create an ImageFolder Dataset
1729   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1730   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1731   EXPECT_NE(ds, nullptr);
1732 
1733   // Create a Project operation on ds
1734   std::vector<std::string> column_project = {"image"};
1735   ds = ds->Project(column_project);
1736   EXPECT_NE(ds, nullptr);
1737 
1738   // Create an ImageFolder Dataset
1739   std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1740   EXPECT_NE(ds1, nullptr);
1741 
1742   // Create a Rename operation on ds (so that the 3 datasets we are going to zip have distinct column names)
1743   ds1 = ds1->Rename({"image", "label"}, {"col1", "col2"});
1744   EXPECT_NE(ds1, nullptr);
1745 
1746   folder_path = datasets_root_path_ + "/testCifar10Data/";
1747   std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
1748   EXPECT_NE(ds2, nullptr);
1749 
1750   // Create a Project operation on ds
1751   column_project = {"label"};
1752   ds2 = ds2->Project(column_project);
1753   EXPECT_NE(ds2, nullptr);
1754 
1755   // Create a Zip operation on the datasets
1756   ds = ds->Zip({ds1, ds2});
1757   EXPECT_NE(ds, nullptr);
1758 
1759   // Create a Batch operation on ds
1760   int32_t batch_size = 1;
1761   ds = ds->Batch(batch_size);
1762   EXPECT_NE(ds, nullptr);
1763 
1764   // Create an iterator over the result of the above dataset
1765   // This will trigger the creation of the Execution Tree and launch it.
1766   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1767   EXPECT_NE(iter, nullptr);
1768 
1769   // iterate over the dataset and get each row
1770   std::unordered_map<std::string, mindspore::MSTensor> row;
1771   ASSERT_OK(iter->GetNextRow(&row));
1772 
1773   // Check zipped column names
1774   EXPECT_EQ(row.size(), 4);
1775   EXPECT_NE(row.find("image"), row.end());
1776   EXPECT_NE(row.find("label"), row.end());
1777   EXPECT_NE(row.find("col1"), row.end());
1778   EXPECT_NE(row.find("col2"), row.end());
1779 
1780   uint64_t i = 0;
1781   while (row.size() != 0) {
1782     i++;
1783     auto image = row["image"];
1784     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1785     ASSERT_OK(iter->GetNextRow(&row));
1786   }
1787 
1788   EXPECT_EQ(i, 10);
1789 
1790   // Manually terminate the pipeline
1791   iter->Stop();
1792 }
1793 
TEST_F(MindDataTestPipeline,TestZipGetDatasetSize)1794 TEST_F(MindDataTestPipeline, TestZipGetDatasetSize) {
1795   // Testing the member zip() function
1796   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipGetDatasetSize.";
1797 
1798   // Create an ImageFolder Dataset
1799   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1800   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
1801   EXPECT_NE(ds, nullptr);
1802 
1803   // Create a Project operation on ds
1804   std::vector<std::string> column_project = {"image"};
1805   ds = ds->Project(column_project);
1806   EXPECT_NE(ds, nullptr);
1807 
1808   // Create an ImageFolder Dataset
1809   std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 3));
1810   EXPECT_NE(ds1, nullptr);
1811 
1812   // Create a Rename operation on ds (so that the 3 datasets we are going to zip have distinct column names)
1813   ds1 = ds1->Rename({"image", "label"}, {"col1", "col2"});
1814   EXPECT_NE(ds1, nullptr);
1815 
1816   folder_path = datasets_root_path_ + "/testCifar10Data/";
1817   std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 5));
1818   EXPECT_NE(ds2, nullptr);
1819 
1820   // Create a Project operation on ds
1821   column_project = {"label"};
1822   ds2 = ds2->Project(column_project);
1823   EXPECT_NE(ds2, nullptr);
1824 
1825   // Create a Zip operation on the datasets
1826   ds = ds->Zip({ds1, ds2});
1827   EXPECT_NE(ds, nullptr);
1828 
1829   EXPECT_EQ(ds->GetDatasetSize(), 2);
1830 }
1831 
TEST_F(MindDataTestPipeline,TestZipSuccess2)1832 TEST_F(MindDataTestPipeline, TestZipSuccess2) {
1833   // Testing the static zip() function
1834   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipSuccess2.";
1835 
1836   // Create an ImageFolder Dataset
1837   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1838   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 9));
1839   EXPECT_NE(ds, nullptr);
1840   std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
1841   EXPECT_NE(ds2, nullptr);
1842 
1843   // Create a Rename operation on ds (so that the 2 datasets we are going to zip have distinct column names)
1844   ds = ds->Rename({"image", "label"}, {"col1", "col2"});
1845   EXPECT_NE(ds, nullptr);
1846 
1847   // Create a Zip operation on the datasets
1848   ds = Zip({ds, ds2});
1849   EXPECT_NE(ds, nullptr);
1850 
1851   // Create a Batch operation on ds
1852   int32_t batch_size = 1;
1853   ds = ds->Batch(batch_size);
1854   EXPECT_NE(ds, nullptr);
1855 
1856   // Create an iterator over the result of the above dataset
1857   // This will trigger the creation of the Execution Tree and launch it.
1858   std::shared_ptr<Iterator> iter = ds->CreateIterator();
1859   EXPECT_NE(iter, nullptr);
1860 
1861   // iterate over the dataset and get each row
1862   std::unordered_map<std::string, mindspore::MSTensor> row;
1863   ASSERT_OK(iter->GetNextRow(&row));
1864 
1865   // Check zipped column names
1866   EXPECT_EQ(row.size(), 4);
1867   EXPECT_NE(row.find("image"), row.end());
1868   EXPECT_NE(row.find("label"), row.end());
1869   EXPECT_NE(row.find("col1"), row.end());
1870   EXPECT_NE(row.find("col2"), row.end());
1871 
1872   uint64_t i = 0;
1873   while (row.size() != 0) {
1874     i++;
1875     auto image = row["image"];
1876     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
1877     ASSERT_OK(iter->GetNextRow(&row));
1878   }
1879 
1880   EXPECT_EQ(i, 9);
1881 
1882   // Manually terminate the pipeline
1883   iter->Stop();
1884 }
1885 
TEST_F(MindDataTestPipeline,TestNumWorkersValidate)1886 TEST_F(MindDataTestPipeline, TestNumWorkersValidate) {
1887   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNumWorkersValidate.";
1888 
1889   // Create an ImageFolder Dataset
1890   std::string folder_path = datasets_root_path_ + "/testPK/data/";
1891   std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<SequentialSampler>(0, 1));
1892 
1893   // ds needs to be non nullptr otherwise, the subsequent logic will core dump
1894   ASSERT_NE(ds, nullptr);
1895 
1896   // test if set num_workers=-1
1897   EXPECT_EQ(ds->SetNumWorkers(-1)->CreateIterator(), nullptr);
1898 
1899   // test if set num_workers can be very large
1900   EXPECT_EQ(ds->SetNumWorkers(INT32_MAX)->CreateIterator(), nullptr);
1901 
1902   int32_t cpu_core_cnt = GlobalContext::config_manager()->num_cpu_threads();
1903 
1904   // only do this test if cpu_core_cnt can be successfully obtained
1905   if (cpu_core_cnt > 0) {
1906     EXPECT_EQ(ds->SetNumWorkers(cpu_core_cnt + 1)->CreateIterator(), nullptr);
1907     // verify setting num_worker to 1 or cpu_core_cnt is allowed
1908     ASSERT_OK(ds->SetNumWorkers(cpu_core_cnt)->IRNode()->ValidateParams());
1909     ASSERT_OK(ds->SetNumWorkers(1)->IRNode()->ValidateParams());
1910   }
1911 }
1912