1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "common/common.h"
17 #include "minddata/dataset/include/dataset/datasets.h"
18
19 using namespace mindspore::dataset;
20 using mindspore::dataset::Tensor;
21
22 class MindDataTestPipeline : public UT::DatasetOpTesting {
23 protected:
24 };
25
TEST_F(MindDataTestPipeline,TestManifestBasic)26 TEST_F(MindDataTestPipeline, TestManifestBasic) {
27 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestBasic.";
28
29 std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
30 // Create a Manifest Dataset
31 std::shared_ptr<Dataset> ds = Manifest(file_path);
32 EXPECT_NE(ds, nullptr);
33
34 // Create an iterator over the result of the above dataset
35 // This will trigger the creation of the Execution Tree and launch it.
36 std::shared_ptr<Iterator> iter = ds->CreateIterator();
37 EXPECT_NE(iter, nullptr);
38
39 // Iterate the dataset and get each row
40 std::unordered_map<std::string, mindspore::MSTensor> row;
41 ASSERT_OK(iter->GetNextRow(&row));
42
43 uint64_t i = 0;
44 while (row.size() != 0) {
45 i++;
46 auto image = row["image"];
47 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
48 ASSERT_OK(iter->GetNextRow(&row));
49 }
50
51 EXPECT_EQ(i, 2);
52
53 // Manually terminate the pipeline
54 iter->Stop();
55 }
56
TEST_F(MindDataTestPipeline,TestManifestBasicWithPipeline)57 TEST_F(MindDataTestPipeline, TestManifestBasicWithPipeline) {
58 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestBasicWithPipeline.";
59
60 std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
61 // Create two Manifest Dataset
62 std::shared_ptr<Dataset> ds1 = Manifest(file_path);
63 std::shared_ptr<Dataset> ds2 = Manifest(file_path);
64 EXPECT_NE(ds1, nullptr);
65 EXPECT_NE(ds2, nullptr);
66
67 // Create two Repeat operation on ds
68 int32_t repeat_num = 2;
69 ds1 = ds1->Repeat(repeat_num);
70 EXPECT_NE(ds1, nullptr);
71 repeat_num = 3;
72 ds2 = ds2->Repeat(repeat_num);
73 EXPECT_NE(ds2, nullptr);
74
75 // Create two Project operation on ds
76 std::vector<std::string> column_project = {"image"};
77 ds1 = ds1->Project(column_project);
78 EXPECT_NE(ds1, nullptr);
79 ds2 = ds2->Project(column_project);
80 EXPECT_NE(ds2, nullptr);
81
82 // Create a Concat operation on the ds
83 ds1 = ds1->Concat({ds2});
84 EXPECT_NE(ds1, nullptr);
85
86 // Create an iterator over the result of the above dataset
87 // This will trigger the creation of the Execution Tree and launch it.
88 std::shared_ptr<Iterator> iter = ds1->CreateIterator();
89 EXPECT_NE(iter, nullptr);
90
91 // Iterate the dataset and get each row
92 std::unordered_map<std::string, mindspore::MSTensor> row;
93 ASSERT_OK(iter->GetNextRow(&row));
94
95 uint64_t i = 0;
96 while (row.size() != 0) {
97 i++;
98 auto image = row["image"];
99 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
100 ASSERT_OK(iter->GetNextRow(&row));
101 }
102
103 EXPECT_EQ(i, 10);
104
105 // Manually terminate the pipeline
106 iter->Stop();
107 }
108
TEST_F(MindDataTestPipeline,TestManifestGetters)109 TEST_F(MindDataTestPipeline, TestManifestGetters) {
110 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestGetters.";
111
112 std::string file_path1 = datasets_root_path_ + "/testManifestData/cpp.json";
113 std::string file_path2 = datasets_root_path_ + "/testManifestData/cpp2.json";
114 // Create a Manifest Dataset
115 std::shared_ptr<Dataset> ds1 = Manifest(file_path1);
116 std::shared_ptr<Dataset> ds2 = Manifest(file_path2);
117 std::vector<std::string> column_names = {"image", "label"};
118
119 EXPECT_NE(ds1, nullptr);
120 EXPECT_EQ(ds1->GetDatasetSize(), 2);
121 EXPECT_EQ(ds1->GetNumClasses(), 2);
122 EXPECT_EQ(ds1->GetColumnNames(), column_names);
123
124 EXPECT_NE(ds2, nullptr);
125 EXPECT_EQ(ds2->GetDatasetSize(), 4);
126 EXPECT_EQ(ds2->GetNumClasses(), 3);
127
128 std::vector<std::pair<std::string, std::vector<int32_t>>> class_index1 = ds1->GetClassIndexing();
129 EXPECT_EQ(class_index1.size(), 2);
130 EXPECT_EQ(class_index1[0].first, "cat");
131 EXPECT_EQ(class_index1[0].second[0], 0);
132 EXPECT_EQ(class_index1[1].first, "dog");
133 EXPECT_EQ(class_index1[1].second[0], 1);
134
135 std::vector<std::pair<std::string, std::vector<int32_t>>> class_index2 = ds2->GetClassIndexing();
136 EXPECT_EQ(class_index2.size(), 3);
137 EXPECT_EQ(class_index2[0].first, "cat");
138 EXPECT_EQ(class_index2[0].second[0], 0);
139 EXPECT_EQ(class_index2[1].first, "dog");
140 EXPECT_EQ(class_index2[1].second[0], 1);
141 EXPECT_EQ(class_index2[2].first, "flower");
142 EXPECT_EQ(class_index2[2].second[0], 2);
143 }
144
TEST_F(MindDataTestPipeline,TestManifestDecode)145 TEST_F(MindDataTestPipeline, TestManifestDecode) {
146 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestDecode.";
147
148 std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
149 // Create a Manifest Dataset
150 std::shared_ptr<Dataset> ds = Manifest(file_path, "train", std::make_shared<RandomSampler>(), {}, true);
151 EXPECT_NE(ds, nullptr);
152
153 // Create an iterator over the result of the above dataset
154 // This will trigger the creation of the Execution Tree and launch it.
155 std::shared_ptr<Iterator> iter = ds->CreateIterator();
156 EXPECT_NE(iter, nullptr);
157
158 // Iterate the dataset and get each row
159 std::unordered_map<std::string, mindspore::MSTensor> row;
160 ASSERT_OK(iter->GetNextRow(&row));
161
162 uint64_t i = 0;
163 while (row.size() != 0) {
164 i++;
165 auto image = row["image"];
166 auto shape = image.Shape();
167 MS_LOG(INFO) << "Tensor image shape size: " << shape.size();
168 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
169 EXPECT_GT(shape.size(), 1); // Verify decode=true took effect
170 ASSERT_OK(iter->GetNextRow(&row));
171 }
172
173 EXPECT_EQ(i, 2);
174
175 // Manually terminate the pipeline
176 iter->Stop();
177 }
178
TEST_F(MindDataTestPipeline,TestManifestEval)179 TEST_F(MindDataTestPipeline, TestManifestEval) {
180 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestEval.";
181
182 std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
183 // Create a Manifest Dataset
184 std::shared_ptr<Dataset> ds = Manifest(file_path, "eval");
185 EXPECT_NE(ds, nullptr);
186
187 // Create an iterator over the result of the above dataset
188 // This will trigger the creation of the Execution Tree and launch it.
189 std::shared_ptr<Iterator> iter = ds->CreateIterator();
190 EXPECT_NE(iter, nullptr);
191
192 // Iterate the dataset and get each row
193 std::unordered_map<std::string, mindspore::MSTensor> row;
194 ASSERT_OK(iter->GetNextRow(&row));
195
196 uint64_t i = 0;
197 while (row.size() != 0) {
198 i++;
199 auto image = row["image"];
200 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
201 ASSERT_OK(iter->GetNextRow(&row));
202 }
203
204 EXPECT_EQ(i, 1);
205
206 // Manually terminate the pipeline
207 iter->Stop();
208 }
209
TEST_F(MindDataTestPipeline,TestManifestClassIndex)210 TEST_F(MindDataTestPipeline, TestManifestClassIndex) {
211 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestClassIndex.";
212
213 std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
214 std::map<std::string, int32_t> map;
215 map["cat"] = 111; // forward slash is not good, but we need to add this somewhere, also in windows, its a '\'
216 map["dog"] = 222; // forward slash is not good, but we need to add this somewhere, also in windows, its a '\'
217 map["wrong folder name"] = 1234; // this is skipped
218 std::vector<int64_t> expected_label = {111, 222};
219
220 // Create a Manifest Dataset
221 std::shared_ptr<Dataset> ds = Manifest(file_path, "train", std::make_shared<RandomSampler>(), map, true);
222 EXPECT_NE(ds, nullptr);
223
224 std::vector<std::pair<std::string, std::vector<int32_t>>> class_index1 = ds->GetClassIndexing();
225 EXPECT_EQ(class_index1.size(), 2);
226 EXPECT_EQ(class_index1[0].first, "cat");
227 EXPECT_EQ(class_index1[0].second[0], 111);
228 EXPECT_EQ(class_index1[1].first, "dog");
229 EXPECT_EQ(class_index1[1].second[0], 222);
230
231 // Create an iterator over the result of the above dataset
232 // This will trigger the creation of the Execution Tree and launch it.
233 std::shared_ptr<Iterator> iter = ds->CreateIterator();
234 EXPECT_NE(iter, nullptr);
235
236 // Iterate the dataset and get each row
237 std::unordered_map<std::string, mindspore::MSTensor> row;
238 ASSERT_OK(iter->GetNextRow(&row));
239
240 uint64_t i = 0;
241 int32_t label_idx = 0;
242 while (row.size() != 0) {
243 i++;
244 auto image = row["image"];
245 auto label = row["label"];
246 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
247
248 std::shared_ptr<Tensor> de_label;
249 ASSERT_OK(Tensor::CreateFromMSTensor(label, &de_label));
250 ASSERT_OK(de_label->GetItemAt<int32_t>(&label_idx, {}));
251 MS_LOG(INFO) << "Tensor label value: " << label_idx;
252 auto label_it = std::find(expected_label.begin(), expected_label.end(), label_idx);
253 EXPECT_NE(label_it, expected_label.end());
254
255 ASSERT_OK(iter->GetNextRow(&row));
256 }
257
258 EXPECT_EQ(i, 2);
259
260 // Manually terminate the pipeline
261 iter->Stop();
262 }
263
TEST_F(MindDataTestPipeline,TestManifestNumSamplers)264 TEST_F(MindDataTestPipeline, TestManifestNumSamplers) {
265 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestNumSamplers.";
266
267 std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
268 // Create a Manifest Dataset
269 std::shared_ptr<Dataset> ds = Manifest(file_path, "train", std::make_shared<SequentialSampler>(0, 1), {}, true);
270 EXPECT_NE(ds, nullptr);
271
272 // Create an iterator over the result of the above dataset
273 // This will trigger the creation of the Execution Tree and launch it.
274 std::shared_ptr<Iterator> iter = ds->CreateIterator();
275 EXPECT_NE(iter, nullptr);
276
277 // Iterate the dataset and get each row
278 std::unordered_map<std::string, mindspore::MSTensor> row;
279 ASSERT_OK(iter->GetNextRow(&row));
280
281 uint64_t i = 0;
282 while (row.size() != 0) {
283 i++;
284 auto image = row["image"];
285 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
286 ASSERT_OK(iter->GetNextRow(&row));
287 }
288
289 EXPECT_EQ(i, 1);
290
291 // Manually terminate the pipeline
292 iter->Stop();
293 }
294
TEST_F(MindDataTestPipeline,TestManifestError)295 TEST_F(MindDataTestPipeline, TestManifestError) {
296 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestError.";
297
298 std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
299 // Create a Manifest Dataset with non-existing file
300 std::shared_ptr<Dataset> ds0 = Manifest("NotExistFile", "train");
301 EXPECT_NE(ds0, nullptr);
302
303 // Create an iterator over the result of the above dataset
304 std::shared_ptr<Iterator> iter0 = ds0->CreateIterator();
305 // Expect failure: invalid Manifest input
306 EXPECT_EQ(iter0, nullptr);
307
308 // Create a Manifest Dataset with invalid usage
309 std::shared_ptr<Dataset> ds1 = Manifest(file_path, "invalid_usage");
310 EXPECT_NE(ds1, nullptr);
311
312 // Create an iterator over the result of the above dataset
313 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
314 // Expect failure: invalid Manifest input
315 EXPECT_EQ(iter1, nullptr);
316
317 // Create a Manifest Dataset with invalid string
318 std::shared_ptr<Dataset> ds2 = Manifest(":*?\"<>|`&;'", "train");
319 EXPECT_NE(ds2, nullptr);
320
321 // Create an iterator over the result of the above dataset
322 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
323 // Expect failure: invalid Manifest input
324 EXPECT_EQ(iter2, nullptr);
325 }
326
TEST_F(MindDataTestPipeline,TestManifestWithNullSamplerError)327 TEST_F(MindDataTestPipeline, TestManifestWithNullSamplerError) {
328 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestManifestWithNullSamplerError.";
329 std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json";
330 // Create a Manifest Dataset
331 std::shared_ptr<Dataset> ds = Manifest(file_path, "train", nullptr);
332 EXPECT_NE(ds, nullptr);
333
334 // Create an iterator over the result of the above dataset
335 std::shared_ptr<Iterator> iter = ds->CreateIterator();
336 // Expect failure: invalid Manifest input, sampler cannot be nullptr
337 EXPECT_EQ(iter, nullptr);
338 }
339