• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "common/common.h"
17 #include "minddata/dataset/include/dataset/datasets.h"
18 
19 using namespace mindspore::dataset;
20 using mindspore::dataset::Tensor;
21 
22 class MindDataTestPipeline : public UT::DatasetOpTesting {
23  protected:
24 };
25 
TEST_F(MindDataTestPipeline,TestManifestBasic)26 TEST_F(MindDataTestPipeline, TestManifestBasic) {
27   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestBasic.";
28 
29   std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
30   // Create a Manifest Dataset
31   std::shared_ptr<Dataset> ds = Manifest(file_path);
32   EXPECT_NE(ds, nullptr);
33 
34   // Create an iterator over the result of the above dataset
35   // This will trigger the creation of the Execution Tree and launch it.
36   std::shared_ptr<Iterator> iter = ds->CreateIterator();
37   EXPECT_NE(iter, nullptr);
38 
39   // Iterate the dataset and get each row
40   std::unordered_map<std::string, mindspore::MSTensor> row;
41   ASSERT_OK(iter->GetNextRow(&row));
42 
43   uint64_t i = 0;
44   while (row.size() != 0) {
45     i++;
46     auto image = row["image"];
47     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
48     ASSERT_OK(iter->GetNextRow(&row));
49   }
50 
51   EXPECT_EQ(i, 2);
52 
53   // Manually terminate the pipeline
54   iter->Stop();
55 }
56 
TEST_F(MindDataTestPipeline,TestManifestBasicWithPipeline)57 TEST_F(MindDataTestPipeline, TestManifestBasicWithPipeline) {
58   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestBasicWithPipeline.";
59 
60   std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
61   // Create two Manifest Dataset
62   std::shared_ptr<Dataset> ds1 = Manifest(file_path);
63   std::shared_ptr<Dataset> ds2 = Manifest(file_path);
64   EXPECT_NE(ds1, nullptr);
65   EXPECT_NE(ds2, nullptr);
66 
67   // Create two Repeat operation on ds
68   int32_t repeat_num = 2;
69   ds1 = ds1->Repeat(repeat_num);
70   EXPECT_NE(ds1, nullptr);
71   repeat_num = 3;
72   ds2 = ds2->Repeat(repeat_num);
73   EXPECT_NE(ds2, nullptr);
74 
75   // Create two Project operation on ds
76   std::vector<std::string> column_project = {"image"};
77   ds1 = ds1->Project(column_project);
78   EXPECT_NE(ds1, nullptr);
79   ds2 = ds2->Project(column_project);
80   EXPECT_NE(ds2, nullptr);
81 
82   // Create a Concat operation on the ds
83   ds1 = ds1->Concat({ds2});
84   EXPECT_NE(ds1, nullptr);
85 
86   // Create an iterator over the result of the above dataset
87   // This will trigger the creation of the Execution Tree and launch it.
88   std::shared_ptr<Iterator> iter = ds1->CreateIterator();
89   EXPECT_NE(iter, nullptr);
90 
91   // Iterate the dataset and get each row
92   std::unordered_map<std::string, mindspore::MSTensor> row;
93   ASSERT_OK(iter->GetNextRow(&row));
94 
95   uint64_t i = 0;
96   while (row.size() != 0) {
97     i++;
98     auto image = row["image"];
99     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
100     ASSERT_OK(iter->GetNextRow(&row));
101   }
102 
103   EXPECT_EQ(i, 10);
104 
105   // Manually terminate the pipeline
106   iter->Stop();
107 }
108 
TEST_F(MindDataTestPipeline,TestManifestGetters)109 TEST_F(MindDataTestPipeline, TestManifestGetters) {
110   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestGetters.";
111 
112   std::string file_path1 = datasets_root_path_ + "/testManifestData/cpp.json";
113   std::string file_path2 = datasets_root_path_ + "/testManifestData/cpp2.json";
114   // Create a Manifest Dataset
115   std::shared_ptr<Dataset> ds1 = Manifest(file_path1);
116   std::shared_ptr<Dataset> ds2 = Manifest(file_path2);
117   std::vector<std::string> column_names = {"image", "label"};
118 
119   EXPECT_NE(ds1, nullptr);
120   EXPECT_EQ(ds1->GetDatasetSize(), 2);
121   EXPECT_EQ(ds1->GetNumClasses(), 2);
122   EXPECT_EQ(ds1->GetColumnNames(), column_names);
123 
124   EXPECT_NE(ds2, nullptr);
125   EXPECT_EQ(ds2->GetDatasetSize(), 4);
126   EXPECT_EQ(ds2->GetNumClasses(), 3);
127 
128   std::vector<std::pair<std::string, std::vector<int32_t>>> class_index1 = ds1->GetClassIndexing();
129   EXPECT_EQ(class_index1.size(), 2);
130   EXPECT_EQ(class_index1[0].first, "cat");
131   EXPECT_EQ(class_index1[0].second[0], 0);
132   EXPECT_EQ(class_index1[1].first, "dog");
133   EXPECT_EQ(class_index1[1].second[0], 1);
134 
135   std::vector<std::pair<std::string, std::vector<int32_t>>> class_index2 = ds2->GetClassIndexing();
136   EXPECT_EQ(class_index2.size(), 3);
137   EXPECT_EQ(class_index2[0].first, "cat");
138   EXPECT_EQ(class_index2[0].second[0], 0);
139   EXPECT_EQ(class_index2[1].first, "dog");
140   EXPECT_EQ(class_index2[1].second[0], 1);
141   EXPECT_EQ(class_index2[2].first, "flower");
142   EXPECT_EQ(class_index2[2].second[0], 2);
143 }
144 
TEST_F(MindDataTestPipeline,TestManifestDecode)145 TEST_F(MindDataTestPipeline, TestManifestDecode) {
146   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestDecode.";
147 
148   std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
149   // Create a Manifest Dataset
150   std::shared_ptr<Dataset> ds = Manifest(file_path, "train", std::make_shared<RandomSampler>(), {}, true);
151   EXPECT_NE(ds, nullptr);
152 
153   // Create an iterator over the result of the above dataset
154   // This will trigger the creation of the Execution Tree and launch it.
155   std::shared_ptr<Iterator> iter = ds->CreateIterator();
156   EXPECT_NE(iter, nullptr);
157 
158   // Iterate the dataset and get each row
159   std::unordered_map<std::string, mindspore::MSTensor> row;
160   ASSERT_OK(iter->GetNextRow(&row));
161 
162   uint64_t i = 0;
163   while (row.size() != 0) {
164     i++;
165     auto image = row["image"];
166     auto shape = image.Shape();
167     MS_LOG(INFO) << "Tensor image shape size: " << shape.size();
168     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
169     EXPECT_GT(shape.size(), 1);  // Verify decode=true took effect
170     ASSERT_OK(iter->GetNextRow(&row));
171   }
172 
173   EXPECT_EQ(i, 2);
174 
175   // Manually terminate the pipeline
176   iter->Stop();
177 }
178 
TEST_F(MindDataTestPipeline,TestManifestEval)179 TEST_F(MindDataTestPipeline, TestManifestEval) {
180   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestEval.";
181 
182   std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
183   // Create a Manifest Dataset
184   std::shared_ptr<Dataset> ds = Manifest(file_path, "eval");
185   EXPECT_NE(ds, nullptr);
186 
187   // Create an iterator over the result of the above dataset
188   // This will trigger the creation of the Execution Tree and launch it.
189   std::shared_ptr<Iterator> iter = ds->CreateIterator();
190   EXPECT_NE(iter, nullptr);
191 
192   // Iterate the dataset and get each row
193   std::unordered_map<std::string, mindspore::MSTensor> row;
194   ASSERT_OK(iter->GetNextRow(&row));
195 
196   uint64_t i = 0;
197   while (row.size() != 0) {
198     i++;
199     auto image = row["image"];
200     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
201     ASSERT_OK(iter->GetNextRow(&row));
202   }
203 
204   EXPECT_EQ(i, 1);
205 
206   // Manually terminate the pipeline
207   iter->Stop();
208 }
209 
TEST_F(MindDataTestPipeline,TestManifestClassIndex)210 TEST_F(MindDataTestPipeline, TestManifestClassIndex) {
211   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestClassIndex.";
212 
213   std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
214   std::map<std::string, int32_t> map;
215   map["cat"] = 111;  // forward slash is not good, but we need to add this somewhere, also in windows, its a '\'
216   map["dog"] = 222;  // forward slash is not good, but we need to add this somewhere, also in windows, its a '\'
217   map["wrong folder name"] = 1234;  // this is skipped
218   std::vector<int64_t> expected_label = {111, 222};
219 
220   // Create a Manifest Dataset
221   std::shared_ptr<Dataset> ds = Manifest(file_path, "train", std::make_shared<RandomSampler>(), map, true);
222   EXPECT_NE(ds, nullptr);
223 
224   std::vector<std::pair<std::string, std::vector<int32_t>>> class_index1 = ds->GetClassIndexing();
225   EXPECT_EQ(class_index1.size(), 2);
226   EXPECT_EQ(class_index1[0].first, "cat");
227   EXPECT_EQ(class_index1[0].second[0], 111);
228   EXPECT_EQ(class_index1[1].first, "dog");
229   EXPECT_EQ(class_index1[1].second[0], 222);
230 
231   // Create an iterator over the result of the above dataset
232   // This will trigger the creation of the Execution Tree and launch it.
233   std::shared_ptr<Iterator> iter = ds->CreateIterator();
234   EXPECT_NE(iter, nullptr);
235 
236   // Iterate the dataset and get each row
237   std::unordered_map<std::string, mindspore::MSTensor> row;
238   ASSERT_OK(iter->GetNextRow(&row));
239 
240   uint64_t i = 0;
241   int32_t label_idx = 0;
242   while (row.size() != 0) {
243     i++;
244     auto image = row["image"];
245     auto label = row["label"];
246     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
247 
248     std::shared_ptr<Tensor> de_label;
249     ASSERT_OK(Tensor::CreateFromMSTensor(label, &de_label));
250     ASSERT_OK(de_label->GetItemAt<int32_t>(&label_idx, {}));
251     MS_LOG(INFO) << "Tensor label value: " << label_idx;
252     auto label_it = std::find(expected_label.begin(), expected_label.end(), label_idx);
253     EXPECT_NE(label_it, expected_label.end());
254 
255     ASSERT_OK(iter->GetNextRow(&row));
256   }
257 
258   EXPECT_EQ(i, 2);
259 
260   // Manually terminate the pipeline
261   iter->Stop();
262 }
263 
TEST_F(MindDataTestPipeline,TestManifestNumSamplers)264 TEST_F(MindDataTestPipeline, TestManifestNumSamplers) {
265   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestNumSamplers.";
266 
267   std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
268   // Create a Manifest Dataset
269   std::shared_ptr<Dataset> ds = Manifest(file_path, "train", std::make_shared<SequentialSampler>(0, 1), {}, true);
270   EXPECT_NE(ds, nullptr);
271 
272   // Create an iterator over the result of the above dataset
273   // This will trigger the creation of the Execution Tree and launch it.
274   std::shared_ptr<Iterator> iter = ds->CreateIterator();
275   EXPECT_NE(iter, nullptr);
276 
277   // Iterate the dataset and get each row
278   std::unordered_map<std::string, mindspore::MSTensor> row;
279   ASSERT_OK(iter->GetNextRow(&row));
280 
281   uint64_t i = 0;
282   while (row.size() != 0) {
283     i++;
284     auto image = row["image"];
285     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
286     ASSERT_OK(iter->GetNextRow(&row));
287   }
288 
289   EXPECT_EQ(i, 1);
290 
291   // Manually terminate the pipeline
292   iter->Stop();
293 }
294 
TEST_F(MindDataTestPipeline,TestManifestError)295 TEST_F(MindDataTestPipeline, TestManifestError) {
296   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestError.";
297 
298   std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
299   // Create a Manifest Dataset with non-existing file
300   std::shared_ptr<Dataset> ds0 = Manifest("NotExistFile", "train");
301   EXPECT_NE(ds0, nullptr);
302 
303   // Create an iterator over the result of the above dataset
304   std::shared_ptr<Iterator> iter0 = ds0->CreateIterator();
305   // Expect failure: invalid Manifest input
306   EXPECT_EQ(iter0, nullptr);
307 
308   // Create a Manifest Dataset with invalid usage
309   std::shared_ptr<Dataset> ds1 = Manifest(file_path, "invalid_usage");
310   EXPECT_NE(ds1, nullptr);
311 
312   // Create an iterator over the result of the above dataset
313   std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
314   // Expect failure: invalid Manifest input
315   EXPECT_EQ(iter1, nullptr);
316 
317   // Create a Manifest Dataset with invalid string
318   std::shared_ptr<Dataset> ds2 = Manifest(":*?\"<>|`&;'", "train");
319   EXPECT_NE(ds2, nullptr);
320 
321   // Create an iterator over the result of the above dataset
322   std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
323   // Expect failure: invalid Manifest input
324   EXPECT_EQ(iter2, nullptr);
325 }
326 
TEST_F(MindDataTestPipeline,TestManifestWithNullSamplerError)327 TEST_F(MindDataTestPipeline, TestManifestWithNullSamplerError) {
328   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestWithNullSamplerError.";
329   std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
330   // Create a Manifest Dataset
331   std::shared_ptr<Dataset> ds = Manifest(file_path, "train", nullptr);
332   EXPECT_NE(ds, nullptr);
333 
334   // Create an iterator over the result of the above dataset
335   std::shared_ptr<Iterator> iter = ds->CreateIterator();
336   // Expect failure: invalid Manifest input, sampler cannot be nullptr
337   EXPECT_EQ(iter, nullptr);
338 }
339