1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "common/common.h"
17 #include "minddata/dataset/include/dataset/datasets.h"
18
19 using namespace mindspore::dataset;
20 using mindspore::dataset::Tensor;
21
22 class MindDataTestPipeline : public UT::DatasetOpTesting {
23 protected:
24 };
25
TEST_F(MindDataTestPipeline,TestFlickrBasic)26 TEST_F(MindDataTestPipeline, TestFlickrBasic) {
27 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFlickrBasic.";
28
29 std::string dataset_path = datasets_root_path_ + "/testFlickrData/flickr30k/flickr30k-images";
30 std::string file_path = datasets_root_path_ + "/testFlickrData/flickr30k/test1.token";
31
32 // Create a Flickr30k Dataset
33 std::shared_ptr<Dataset> ds = Flickr(dataset_path, file_path);
34 EXPECT_NE(ds, nullptr);
35
36 // Create an iterator over the result of the above dataset
37 // This will trigger the creation of the Execution Tree and launch it.
38 std::shared_ptr<Iterator> iter = ds->CreateIterator();
39 EXPECT_NE(iter, nullptr);
40
41 // Iterate the dataset and get each row
42 std::unordered_map<std::string, mindspore::MSTensor> row;
43 ASSERT_OK(iter->GetNextRow(&row));
44
45 uint64_t i = 0;
46 while (row.size() != 0) {
47 i++;
48 auto image = row["image"];
49 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
50 ASSERT_OK(iter->GetNextRow(&row));
51 }
52
53 EXPECT_EQ(i, 2);
54
55 // Manually terminate the pipeline
56 iter->Stop();
57 }
58
TEST_F(MindDataTestPipeline,TestFlickrBasicWithPipeline)59 TEST_F(MindDataTestPipeline, TestFlickrBasicWithPipeline) {
60 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFlickrBasicWithPipeline.";
61
62 std::string dataset_path = datasets_root_path_ + "/testFlickrData/flickr30k/flickr30k-images";
63 std::string file_path = datasets_root_path_ + "/testFlickrData/flickr30k/test1.token";
64
65 // Create two Flickr30k Dataset
66 std::shared_ptr<Dataset> ds1 = Flickr(dataset_path, file_path);
67 std::shared_ptr<Dataset> ds2 = Flickr(dataset_path, file_path);
68 EXPECT_NE(ds1, nullptr);
69 EXPECT_NE(ds2, nullptr);
70
71 // Create two Repeat operation on ds
72 int32_t repeat_num = 2;
73 ds1 = ds1->Repeat(repeat_num);
74 EXPECT_NE(ds1, nullptr);
75 repeat_num = 3;
76 ds2 = ds2->Repeat(repeat_num);
77 EXPECT_NE(ds2, nullptr);
78
79 // Create two Project operation on ds
80 std::vector<std::string> column_project = {"image"};
81 ds1 = ds1->Project(column_project);
82 EXPECT_NE(ds1, nullptr);
83 ds2 = ds2->Project(column_project);
84 EXPECT_NE(ds2, nullptr);
85
86 // Create a Concat operation on the ds
87 ds1 = ds1->Concat({ds2});
88 EXPECT_NE(ds1, nullptr);
89
90 // Create an iterator over the result of the above dataset
91 // This will trigger the creation of the Execution Tree and launch it.
92 std::shared_ptr<Iterator> iter = ds1->CreateIterator();
93 EXPECT_NE(iter, nullptr);
94
95 // Iterate the dataset and get each row
96 std::unordered_map<std::string, mindspore::MSTensor> row;
97 ASSERT_OK(iter->GetNextRow(&row));
98
99 uint64_t i = 0;
100 while (row.size() != 0) {
101 i++;
102 auto image = row["image"];
103 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
104 ASSERT_OK(iter->GetNextRow(&row));
105 }
106
107 EXPECT_EQ(i, 10);
108
109 // Manually terminate the pipeline
110 iter->Stop();
111 }
112
TEST_F(MindDataTestPipeline,TestFlickrGetters)113 TEST_F(MindDataTestPipeline, TestFlickrGetters) {
114 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFlickrGetters.";
115
116 std::string dataset_path = datasets_root_path_ + "/testFlickrData/flickr30k/flickr30k-images";
117 std::string file_path1 = datasets_root_path_ + "/testFlickrData/flickr30k/test1.token";
118 std::string file_path2 = datasets_root_path_ + "/testFlickrData/flickr30k/test2.token";
119
120 // Create a Flickr30k Dataset
121 std::shared_ptr<Dataset> ds1 = Flickr(dataset_path, file_path1);
122 std::shared_ptr<Dataset> ds2 = Flickr(dataset_path, file_path2);
123 std::vector<std::string> column_names = {"image", "annotation"};
124
125 EXPECT_NE(ds1, nullptr);
126 EXPECT_EQ(ds1->GetDatasetSize(), 2);
127 EXPECT_EQ(ds1->GetColumnNames(), column_names);
128
129 EXPECT_NE(ds2, nullptr);
130 EXPECT_EQ(ds2->GetDatasetSize(), 3);
131 EXPECT_EQ(ds2->GetColumnNames(), column_names);
132 }
133
TEST_F(MindDataTestPipeline,TestFlickrAnnotations)134 TEST_F(MindDataTestPipeline, TestFlickrAnnotations) {
135 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFlickrGetters.";
136
137 std::string dataset_path = datasets_root_path_ + "/testFlickrData/flickr30k/flickr30k-images";
138 std::string file_path = datasets_root_path_ + "/testFlickrData/flickr30k/test3.token";
139 std::shared_ptr<Dataset> ds = Flickr(dataset_path, file_path);
140
141 // Create an iterator over the result of the above dataset
142 // This will trigger the creation of the Execution Tree and launch it.
143 std::shared_ptr<Iterator> iter = ds->CreateIterator();
144 EXPECT_NE(iter, nullptr);
145
146 // Iterate the dataset and get each row
147 std::unordered_map<std::string, mindspore::MSTensor> row;
148 ASSERT_OK(iter->GetNextRow(&row));
149
150 std::shared_ptr<Tensor> a_expect_item;
151 std::vector<std::string> annotation_arr;
152 annotation_arr.emplace_back("This is a banana.");
153 annotation_arr.emplace_back("This is a yellow banana.");
154 annotation_arr.emplace_back("This is a banana on the table.");
155 annotation_arr.emplace_back("The banana is yellow.");
156 annotation_arr.emplace_back("The banana is very big.");
157
158 ASSERT_OK(Tensor::CreateFromVector(annotation_arr, &a_expect_item));
159 mindspore::MSTensor expect_item = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(a_expect_item));
160
161 uint64_t i = 0;
162 while (row.size() != 0) {
163 i++;
164 auto image = row["image"];
165 auto annotation = row["annotation"];
166 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
167 MS_LOG(INFO) << "Tensor annotation shape: " << annotation.Shape();
168
169 EXPECT_MSTENSOR_EQ(annotation, expect_item);
170
171 ASSERT_OK(iter->GetNextRow(&row));
172 }
173
174 EXPECT_EQ(i, 1);
175
176 // Manually terminate the pipeline
177 iter->Stop();
178 }
179
TEST_F(MindDataTestPipeline,TestFlickrDecode)180 TEST_F(MindDataTestPipeline, TestFlickrDecode) {
181 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFlickrDecode.";
182
183 std::string dataset_path = datasets_root_path_ + "/testFlickrData/flickr30k/flickr30k-images";
184 std::string file_path = datasets_root_path_ + "/testFlickrData/flickr30k/test1.token";
185 // Create a Flickr30k Dataset
186 std::shared_ptr<Dataset> ds = Flickr(dataset_path, file_path, true, std::make_shared<RandomSampler>());
187 EXPECT_NE(ds, nullptr);
188
189 // Create an iterator over the result of the above dataset
190 // This will trigger the creation of the Execution Tree and launch it.
191 std::shared_ptr<Iterator> iter = ds->CreateIterator();
192 EXPECT_NE(iter, nullptr);
193
194 // Iterate the dataset and get each row
195 std::unordered_map<std::string, mindspore::MSTensor> row;
196 ASSERT_OK(iter->GetNextRow(&row));
197
198 uint64_t i = 0;
199 while (row.size() != 0) {
200 i++;
201 auto image = row["image"];
202 auto shape = image.Shape();
203 MS_LOG(INFO) << "Tensor image shape size: " << shape.size();
204 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
205 EXPECT_GT(shape.size(), 1); // Verify decode=true took effect
206 ASSERT_OK(iter->GetNextRow(&row));
207 }
208
209 EXPECT_EQ(i, 2);
210
211 // Manually terminate the pipeline
212 iter->Stop();
213 }
214
TEST_F(MindDataTestPipeline,TestFlickrNumSamplers)215 TEST_F(MindDataTestPipeline, TestFlickrNumSamplers) {
216 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFlickrNumSamplers.";
217
218 std::string dataset_path = datasets_root_path_ + "/testFlickrData/flickr30k/flickr30k-images";
219 std::string file_path = datasets_root_path_ + "/testFlickrData/flickr30k/test1.token";
220 // Create a Flickr30k Dataset
221 std::shared_ptr<Dataset> ds = Flickr(dataset_path, file_path, true, std::make_shared<SequentialSampler>(0, 1));
222 EXPECT_NE(ds, nullptr);
223
224 // Create an iterator over the result of the above dataset
225 // This will trigger the creation of the Execution Tree and launch it.
226 std::shared_ptr<Iterator> iter = ds->CreateIterator();
227 EXPECT_NE(iter, nullptr);
228
229 // Iterate the dataset and get each row
230 std::unordered_map<std::string, mindspore::MSTensor> row;
231 ASSERT_OK(iter->GetNextRow(&row));
232
233 uint64_t i = 0;
234 while (row.size() != 0) {
235 i++;
236 auto image = row["image"];
237 auto annotation = row["annotation"];
238 MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
239
240 auto a_it = annotation.Shape().begin();
241 for (; a_it != annotation.Shape().end(); ++a_it) {
242 std::cout << "annotation shape " << *a_it << std::endl;
243 }
244 ASSERT_OK(iter->GetNextRow(&row));
245 }
246
247 EXPECT_EQ(i, 1);
248
249 // Manually terminate the pipeline
250 iter->Stop();
251 }
252
TEST_F(MindDataTestPipeline,TestFlickrError)253 TEST_F(MindDataTestPipeline, TestFlickrError) {
254 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFlickrError.";
255
256 std::string dataset_path = datasets_root_path_ + "/testFlickrData/flickr30k/flickr30k-images";
257 std::string file_path = datasets_root_path_ + "/testFlickrData/flickr30k/test1.token";
258 // Create a Flickr30k Dataset with non-existing dataset dir
259 std::shared_ptr<Dataset> ds0 = Flickr("NotExistFile", file_path);
260 EXPECT_NE(ds0, nullptr);
261
262 // Create an iterator over the result of the above dataset
263 std::shared_ptr<Iterator> iter0 = ds0->CreateIterator();
264 // Expect failure: invalid Flickr30k input
265 EXPECT_EQ(iter0, nullptr);
266
267 // Create a Flickr30k Dataset with non-existing annotation file
268 std::shared_ptr<Dataset> ds1 = Flickr(dataset_path, "NotExistFile");
269 EXPECT_NE(ds1, nullptr);
270
271 // Create an iterator over the result of the above dataset
272 std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
273 // Expect failure: invalid Flickr30k input
274 EXPECT_EQ(iter1, nullptr);
275
276 // Create a Flickr30k Dataset with invalid string of dataset dir
277 std::shared_ptr<Dataset> ds2 = Flickr(":*?\"<>|`&;'", file_path);
278 EXPECT_NE(ds2, nullptr);
279
280 // Create an iterator over the result of the above dataset
281 std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
282 // Expect failure: invalid Flickr30k input
283 EXPECT_EQ(iter2, nullptr);
284
285 // Create a Flickr30k Dataset with invalid string of annotation file
286 std::shared_ptr<Dataset> ds3 = Flickr(dataset_path, ":*?\"<>|`&;'");
287 EXPECT_NE(ds3, nullptr);
288
289 // Create an iterator over the result of the above dataset
290 std::shared_ptr<Iterator> iter3 = ds3->CreateIterator();
291 // Expect failure: invalid Flickr30k input
292 EXPECT_EQ(iter3, nullptr);
293 }
294
TEST_F(MindDataTestPipeline,TestFlickrWithNullSamplerError)295 TEST_F(MindDataTestPipeline, TestFlickrWithNullSamplerError) {
296 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestFlickrWithNullSamplerError.";
297
298 std::string dataset_path = datasets_root_path_ + "/testFlickrData/flickr30k/flickr30k-images";
299 std::string file_path = datasets_root_path_ + "/testFlickrData/flickr30k/test1.token";
300 // Create a Flickr30k Dataset
301 std::shared_ptr<Dataset> ds = Flickr(dataset_path, file_path, false, nullptr);
302 EXPECT_NE(ds, nullptr);
303
304 // Create an iterator over the result of the above dataset
305 std::shared_ptr<Iterator> iter = ds->CreateIterator();
306 // Expect failure: invalid Flickr30k input, sampler cannot be nullptr
307 EXPECT_EQ(iter, nullptr);
308 }