1 /**
2 * Copyright 2019-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <memory>
17 #include <string>
18 #include "minddata/dataset/core/client.h"
19 // #include "minddata/dataset/core/pybind_support.h"
20 // #include "minddata/dataset/core/tensor.h"
21 // #include "minddata/dataset/core/tensor_shape.h"
22 // #include "minddata/dataset/engine/datasetops/batch_op.h"
23 #include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
24 #include "common/common.h"
25 #include "gtest/gtest.h"
26 #include "utils/log_adapter.h"
27 #include "securec.h"
28 #include "minddata/dataset/util/status.h"
29 // #include "pybind11/numpy.h"
30 // #include "pybind11/pybind11.h"
31
32 // #include "utils/ms_utils.h"
33
34 // #include "minddata/dataset/engine/db_connector.h"
35 // #include "minddata/dataset/kernels/data/data_utils.h"
36
37 namespace common = mindspore::common;
38 namespace de = mindspore::dataset;
39
40 using namespace mindspore::dataset;
41 using mindspore::LogStream;
42 using mindspore::ExceptionType::NoExceptionType;
43 using mindspore::MsLogLevel::ERROR;
44
45 class MindDataTestBatchOp : public UT::DatasetOpTesting {
46 protected:
47 };
48
TEST_F(MindDataTestBatchOp,TestSimpleBatch)49 TEST_F(MindDataTestBatchOp, TestSimpleBatch) {
50 std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
51 bool success = false;
52 const std::shared_ptr<de::BatchOp> &op = Batch(12);
53 EXPECT_EQ(op->Name(), "BatchOp");
54
55 auto tree = Build({TFReader(schema_file), op});
56 tree->Prepare();
57 Status rc = tree->Launch();
58 if (rc.IsError()) {
59 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
60 } else {
61 int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
62 de::DatasetIterator di(tree);
63 TensorMap tensor_map;
64 rc = di.GetNextAsMap(&tensor_map);
65 EXPECT_TRUE(rc.IsOk());
66 std::shared_ptr<de::Tensor> t;
67 rc = de::Tensor::CreateFromMemory(de::TensorShape({12, 1}), de::DataType(DataType::DE_INT64),
68 (unsigned char *)payload, &t);
69 EXPECT_TRUE(rc.IsOk());
70 // verify the actual data in Tensor is correct
71 EXPECT_EQ(*t == *tensor_map["col_sint64"], true);
72 // change what's in Tensor and verify this time the data is incorrect1;
73 EXPECT_EQ(*t == *tensor_map["col_sint16"], false);
74 rc = di.GetNextAsMap(&tensor_map);
75 EXPECT_TRUE(rc.IsOk());
76 if (tensor_map.size() == 0) {
77 success = true;
78 }
79 }
80 EXPECT_EQ(success, true);
81 }
82
TEST_F(MindDataTestBatchOp,TestRepeatBatchDropTrue)83 TEST_F(MindDataTestBatchOp, TestRepeatBatchDropTrue) {
84 std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
85 bool success = false;
86 auto op1 = TFReader(schema_file);
87 auto op2 = Repeat(2);
88 auto op3 = Batch(7, true);
89 op1->SetTotalRepeats(2);
90 op1->SetNumRepeatsPerEpoch(2);
91 auto tree = Build({op1, op2, op3});
92 tree->Prepare();
93 Status rc = tree->Launch();
94 if (rc.IsError()) {
95 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
96 } else {
97 int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807,
98 -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
99 de::DatasetIterator di(tree);
100 std::shared_ptr<de::Tensor> t1, t2, t3;
101 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
102 (unsigned char *)payload, &t1);
103 EXPECT_TRUE(rc.IsOk());
104 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
105 (unsigned char *)(payload + 7), &t2);
106 EXPECT_TRUE(rc.IsOk());
107 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
108 (unsigned char *)(payload + 2), &t3);
109 EXPECT_TRUE(rc.IsOk());
110
111 TensorMap tensor_map;
112 rc = di.GetNextAsMap(&tensor_map);
113 EXPECT_TRUE(rc.IsOk());
114 EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext()
115
116 rc = di.GetNextAsMap(&tensor_map);
117 EXPECT_TRUE(rc.IsOk());
118 EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext()
119
120 rc = di.GetNextAsMap(&tensor_map);
121 EXPECT_TRUE(rc.IsOk());
122 EXPECT_EQ(*t3 == *(tensor_map["col_sint64"]), true); // third call to getNext()
123
124 rc = di.GetNextAsMap(&tensor_map);
125 EXPECT_TRUE(rc.IsOk());
126 if (tensor_map.size() == 0) {
127 success = true;
128 }
129 }
130 EXPECT_EQ(success, true);
131 }
132
TEST_F(MindDataTestBatchOp,TestRepeatBatchDropFalse)133 TEST_F(MindDataTestBatchOp, TestRepeatBatchDropFalse) {
134 std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
135 bool success = false;
136 auto op1 = TFReader(schema_file);
137 auto op2 = Repeat(2);
138 auto op3 = Batch(7, false);
139 op1->SetTotalRepeats(2);
140 op1->SetNumRepeatsPerEpoch(2);
141 auto tree = Build({op1, op2, op3});
142 tree->Prepare();
143 Status rc = tree->Launch();
144 if (rc.IsError()) {
145 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
146 } else {
147 int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807,
148 -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
149 de::DatasetIterator di(tree);
150 std::shared_ptr<de::Tensor> t1, t2, t3, t4;
151 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
152 (unsigned char *)payload, &t1);
153 EXPECT_TRUE(rc.IsOk());
154 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
155 (unsigned char *)(payload + 7), &t2);
156 EXPECT_TRUE(rc.IsOk());
157 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
158 (unsigned char *)(payload + 2), &t3);
159 EXPECT_TRUE(rc.IsOk());
160 rc = de::Tensor::CreateFromMemory(de::TensorShape({3, 1}), de::DataType(DataType::DE_INT64),
161 (unsigned char *)(payload + 9), &t4);
162 EXPECT_TRUE(rc.IsOk());
163
164 TensorMap tensor_map;
165 rc = di.GetNextAsMap(&tensor_map);
166 EXPECT_TRUE(rc.IsOk());
167 EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext()
168
169 rc = di.GetNextAsMap(&tensor_map);
170 EXPECT_TRUE(rc.IsOk());
171 EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext()
172
173 rc = di.GetNextAsMap(&tensor_map);
174 EXPECT_TRUE(rc.IsOk());
175 EXPECT_EQ(*t3 == *(tensor_map["col_sint64"]), true); // third call to getNext()
176
177 rc = di.GetNextAsMap(&tensor_map);
178 EXPECT_TRUE(rc.IsOk());
179 EXPECT_EQ(*t4 == *(tensor_map["col_sint64"]), true); // last call to getNext()
180
181 rc = di.GetNextAsMap(&tensor_map);
182 EXPECT_TRUE(rc.IsOk());
183 if (tensor_map.size() == 0) {
184 success = true;
185 }
186 }
187 EXPECT_EQ(success, true);
188 }
189
TEST_F(MindDataTestBatchOp,TestBatchDropFalseRepeat)190 TEST_F(MindDataTestBatchOp, TestBatchDropFalseRepeat) {
191 std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
192 bool success = false;
193 auto op1 = TFReader(schema_file);
194 auto op2 = Batch(7, false);
195 auto op3 = Repeat(2);
196 op1->SetTotalRepeats(2);
197 op1->SetNumRepeatsPerEpoch(2);
198 op2->SetTotalRepeats(2);
199 op2->SetNumRepeatsPerEpoch(2);
200 auto tree = Build({op1, op2, op3});
201 tree->Prepare();
202 Status rc = tree->Launch();
203 if (rc.IsError()) {
204 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
205 } else {
206 int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807,
207 -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
208 de::DatasetIterator di(tree);
209 std::shared_ptr<de::Tensor> t1, t2;
210 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
211 (unsigned char *)payload, &t1);
212 EXPECT_TRUE(rc.IsOk());
213 rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
214 (unsigned char *)(payload + 7), &t2);
215 EXPECT_TRUE(rc.IsOk());
216
217 TensorMap tensor_map;
218 rc = di.GetNextAsMap(&tensor_map);
219 EXPECT_TRUE(rc.IsOk());
220 EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext()
221
222 rc = di.GetNextAsMap(&tensor_map);
223 EXPECT_TRUE(rc.IsOk());
224 EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext()
225
226 rc = di.GetNextAsMap(&tensor_map);
227 EXPECT_TRUE(rc.IsOk());
228 EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // third call to getNext()
229
230 rc = di.GetNextAsMap(&tensor_map);
231 EXPECT_TRUE(rc.IsOk());
232 EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // last call to getNext()
233
234 rc = di.GetNextAsMap(&tensor_map);
235 EXPECT_TRUE(rc.IsOk());
236 if (tensor_map.size() == 0) {
237 success = true;
238 }
239 }
240 EXPECT_EQ(success, true);
241 }
242
TEST_F(MindDataTestBatchOp,TestBatchDropTrueRepeat)243 TEST_F(MindDataTestBatchOp, TestBatchDropTrueRepeat) {
244 std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
245 bool success = false;
246 auto op1 = TFReader(schema_file);
247 auto op2 = Batch(5, true);
248 auto op3 = Repeat(2);
249 op1->SetTotalRepeats(2);
250 op1->SetNumRepeatsPerEpoch(2);
251 op2->SetTotalRepeats(2);
252 op2->SetNumRepeatsPerEpoch(2);
253 auto tree = Build({op1, op2, op3});
254 tree->Prepare();
255 Status rc = tree->Launch();
256 if (rc.IsError()) {
257 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
258 } else {
259 int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807,
260 -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
261 de::DatasetIterator di(tree);
262 std::shared_ptr<de::Tensor> t1, t2;
263 rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
264 (unsigned char *)payload, &t1);
265 EXPECT_TRUE(rc.IsOk());
266 rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
267 (unsigned char *)(payload + 5), &t2);
268 EXPECT_TRUE(rc.IsOk());
269
270 TensorMap tensor_map;
271 rc = di.GetNextAsMap(&tensor_map);
272 EXPECT_TRUE(rc.IsOk());
273 EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext()
274
275 rc = di.GetNextAsMap(&tensor_map);
276 EXPECT_TRUE(rc.IsOk());
277 EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext()
278
279 rc = di.GetNextAsMap(&tensor_map);
280 EXPECT_TRUE(rc.IsOk());
281 EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // third call to getNext()
282
283 rc = di.GetNextAsMap(&tensor_map);
284 EXPECT_TRUE(rc.IsOk());
285 EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // last call to getNext()
286
287 rc = di.GetNextAsMap(&tensor_map);
288 EXPECT_TRUE(rc.IsOk());
289 if (tensor_map.size() == 0) {
290 success = true;
291 }
292 }
293 EXPECT_EQ(success, true);
294 }
295
TEST_F(MindDataTestBatchOp,TestSimpleBatchPadding)296 TEST_F(MindDataTestBatchOp, TestSimpleBatchPadding) {
297 std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
298 PadInfo m;
299 std::shared_ptr<Tensor> pad_value;
300 Tensor::CreateEmpty(TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32), &pad_value);
301 pad_value->SetItemAt<float>({}, -1);
302 m.insert({"col_1d", std::make_pair(TensorShape({4}), pad_value)});
303 /*
304 std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
305 auto op_connector_size = config_manager->op_connector_size();
306 auto num_workers = config_manager->num_parallel_workers();
307 std::vector<std::string> input_columns = {};
308 std::vector<std::string> output_columns = {};
309 pybind11::function batch_size_func;
310 pybind11::function batch_map_func;
311 */
312 int32_t batch_size = 12;
313 bool drop = false;
314 std::shared_ptr<BatchOp> op = Batch(batch_size, drop, m);
315 // std::make_shared<BatchOp>(batch_size, drop, pad, op_connector_size, num_workers, input_columns, output_columns,
316 // batch_size_func, batch_map_func, m);
317 auto tree = Build({TFReader(schema_file), op});
318 tree->Prepare();
319 Status rc = tree->Launch();
320 if (rc.IsError()) {
321 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
322 } else {
323 int64_t payload[] = {-9223372036854775807 - 1,
324 1,
325 -1,
326 -1,
327 2,
328 3,
329 -1,
330 -1,
331 4,
332 5,
333 -1,
334 -1,
335 6,
336 7,
337 -1,
338 -1,
339 8,
340 9,
341 -1,
342 -1,
343 10,
344 11,
345 -1,
346 -1,
347 12,
348 13,
349 -1,
350 -1,
351 14,
352 15,
353 -1,
354 -1,
355 16,
356 17,
357 -1,
358 -1,
359 18,
360 19,
361 -1,
362 -1,
363 20,
364 21,
365 -1,
366 -1,
367 22,
368 23,
369 -1,
370 -1};
371 std::shared_ptr<de::Tensor> t;
372 rc = de::Tensor::CreateFromMemory(de::TensorShape({12, 4}), de::DataType(DataType::DE_INT64),
373 (unsigned char *)payload, &t);
374 de::DatasetIterator di(tree);
375 TensorMap tensor_map;
376 rc = di.GetNextAsMap(&tensor_map);
377 EXPECT_TRUE((*t) == (*(tensor_map["col_1d"])));
378 rc = di.GetNextAsMap(&tensor_map);
379 EXPECT_TRUE(tensor_map.size() == 0);
380 EXPECT_TRUE(rc.IsOk());
381 }
382 }
383