1 /**
2 * Copyright 2019-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "minddata/dataset/core/client.h"
18 #include "common/common.h"
19 #include "gtest/gtest.h"
20 #include <memory>
21 #include <vector>
22 #include <iostream>
23 #include "minddata/dataset/core/tensor_shape.h"
24 #include "minddata/dataset/engine/datasetops/source/random_data_op.h"
25 #include "minddata/dataset/engine/data_schema.h"
26 #include "minddata/dataset/util/random.h"
27
28 using namespace mindspore::dataset;
29 using mindspore::LogStream;
30 using mindspore::ExceptionType::NoExceptionType;
31 using mindspore::MsLogLevel::INFO;
32
33 class MindDataTestRandomDataOp : public UT::DatasetOpTesting {};
34
35 // Test info:
36 // - Simple test with a user-provided schema generated purely from DataSchema C API
37 // - has an interaction loop
38 //
39 // Tree: single node tree with RandomDataOp
40 //
41 // RandomDataOp
42 //
TEST_F(MindDataTestRandomDataOp,RandomDataOpBasic1)43 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) {
44 Status rc;
45 int32_t rank = 0; // not used
46 MS_LOG(INFO) << "UT test RandomDataOpBasic1";
47
48 // Start with an empty execution tree
49 auto myTree = std::make_shared<ExecutionTree>();
50
51 // Create a schema using the C api's
52 std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
53
54 // RandomDataOp can randomly fill in unknown dimension lengths of a shape.
55 // Most other ops cannot do that as they are limited by the physical data itself. We're
56 // more flexible with random data since it is just making stuff up on the fly.
57 TensorShape c1Shape({TensorShape::kDimUnknown, TensorShape::kDimUnknown, 3});
58 ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
59 rank, // not used
60 &c1Shape);
61
62 // Column 2 will just be a scalar label number
63 TensorShape c2Shape({}); // empty shape is a 1-value scalar Tensor
64 ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
65
66 testSchema->AddColumn(c1);
67 testSchema->AddColumn(c2);
68 std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
69 auto op_connector_size = cfg->op_connector_size();
70
71 std::shared_ptr<RandomDataOp> myRandomDataOp =
72 std::make_shared<RandomDataOp>(1, op_connector_size, 25, std::move(testSchema));
73
74 rc = myTree->AssociateNode(myRandomDataOp);
75 EXPECT_TRUE(rc.IsOk());
76
77 rc = myTree->AssignRoot(myRandomDataOp);
78 EXPECT_TRUE(rc.IsOk());
79
80 std::ostringstream ss;
81 ss << *myRandomDataOp;
82 MS_LOG(INFO) << "RandomDataOp print: %s" << ss.str();
83
84 MS_LOG(INFO) << "Launching tree and begin iteration";
85 rc = myTree->Prepare();
86 EXPECT_TRUE(rc.IsOk());
87 rc = myTree->Launch();
88 EXPECT_TRUE(rc.IsOk());
89
90 // Start the loop of reading tensors from our pipeline
91 DatasetIterator dI(myTree);
92 TensorRow tensorList;
93 rc = dI.FetchNextTensorRow(&tensorList);
94 EXPECT_TRUE(rc.IsOk());
95 int rowCount = 0;
96 while (!tensorList.empty()) {
97 // Don't display these rows...too big to show
98 MS_LOG(INFO) << "Row fetched #: " << rowCount;
99
100 rc = dI.FetchNextTensorRow(&tensorList);
101 EXPECT_TRUE(rc.IsOk());
102 rowCount++;
103 }
104 ASSERT_EQ(rowCount, 25);
105 }
106
107 // Test info:
108 // - Simple test with a randomly generated schema
109 // - no iteration loop on this one, just create the op
110 //
111 // Tree: single node tree with RandomDataOp
112 //
113 // RandomDataOp
114 //
TEST_F(MindDataTestRandomDataOp,RandomDataOpBasic2)115 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) {
116 Status rc;
117 MS_LOG(INFO) << "UT test RandomDataOpBasic2";
118
119 // Start with an empty execution tree
120 auto myTree = std::make_shared<ExecutionTree>();
121
122 std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
123 auto op_connector_size = cfg->op_connector_size();
124
125 std::shared_ptr<RandomDataOp> myRandomDataOp = std::make_shared<RandomDataOp>(1, op_connector_size, 0, nullptr);
126
127 rc = myTree->AssociateNode(myRandomDataOp);
128 EXPECT_TRUE(rc.IsOk());
129
130 rc = myTree->AssignRoot(myRandomDataOp);
131 EXPECT_TRUE(rc.IsOk());
132
133 std::ostringstream ss;
134 ss << *myRandomDataOp;
135 MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
136 }
137
138 // Test info:
139 // - json file test with iteration
140 //
141 // Tree: single node tree with RandomDataOp
142 //
143 // RandomDataOp
144 //
TEST_F(MindDataTestRandomDataOp,RandomDataOpBasic3)145 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) {
146 Status rc;
147 MS_LOG(INFO) << "UT test RandomDataOpBasic3";
148
149 // Start with an empty execution tree
150 auto myTree = std::make_shared<ExecutionTree>();
151
152 std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
153 rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema.json", {});
154 EXPECT_TRUE(rc.IsOk());
155 std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
156 auto op_connector_size = cfg->op_connector_size();
157
158 std::shared_ptr<RandomDataOp> myRandomDataOp =
159 std::make_shared<RandomDataOp>(1, op_connector_size, 10, std::move(testSchema));
160
161 rc = myTree->AssociateNode(myRandomDataOp);
162 EXPECT_TRUE(rc.IsOk());
163
164 rc = myTree->AssignRoot(myRandomDataOp);
165 EXPECT_TRUE(rc.IsOk());
166
167 std::ostringstream ss;
168 ss << *myRandomDataOp;
169 MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
170
171 MS_LOG(INFO) << "Launching tree and begin iteration";
172 rc = myTree->Prepare();
173 EXPECT_TRUE(rc.IsOk());
174 rc = myTree->Launch();
175 EXPECT_TRUE(rc.IsOk());
176
177 // Start the loop of reading tensors from our pipeline
178 DatasetIterator dI(myTree);
179 TensorRow tensorList;
180 rc = dI.FetchNextTensorRow(&tensorList);
181 EXPECT_TRUE(rc.IsOk());
182 int rowCount = 0;
183 while (!tensorList.empty()) {
184 // Don't display these rows...too big to show
185 MS_LOG(INFO) << "Row fetched #: " << rowCount;
186
187 rc = dI.FetchNextTensorRow(&tensorList);
188 EXPECT_TRUE(rc.IsOk());
189 rowCount++;
190 }
191 ASSERT_EQ(rowCount, 10);
192 }
193
194 // Test info:
195 // - json schema input it's a fairly simple one
196 // - has an interaction loop
197 //
198 // Tree: RepeatOp over RandomDataOp
199 //
200 // RepeatOp
201 // |
202 // RandomDataOp
203 //
TEST_F(MindDataTestRandomDataOp,RandomDataOpBasic4)204 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) {
205 Status rc;
206 MS_LOG(INFO) << "UT test RandomDataOpBasic4";
207
208 // Start with an empty execution tree
209 auto myTree = std::make_shared<ExecutionTree>();
210
211 std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
212 rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
213 EXPECT_TRUE(rc.IsOk());
214 std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
215 auto op_connector_size = cfg->op_connector_size();
216
217 std::shared_ptr<RandomDataOp> myRandomDataOp =
218 std::make_shared<RandomDataOp>(1, op_connector_size, 10, std::move(testSchema));
219
220 rc = myTree->AssociateNode(myRandomDataOp);
221 EXPECT_TRUE(rc.IsOk());
222
223 uint32_t numRepeats = 2;
224 std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats);
225 rc = myTree->AssociateNode(myRepeatOp);
226 EXPECT_TRUE(rc.IsOk());
227
228 myRandomDataOp->SetTotalRepeats(numRepeats);
229 myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats);
230 rc = myRepeatOp->AddChild(myRandomDataOp);
231 EXPECT_TRUE(rc.IsOk());
232
233 rc = myTree->AssignRoot(myRepeatOp);
234 EXPECT_TRUE(rc.IsOk());
235
236 MS_LOG(INFO) << "Launching tree and begin iteration";
237 rc = myTree->Prepare();
238 EXPECT_TRUE(rc.IsOk());
239 rc = myTree->Launch();
240 EXPECT_TRUE(rc.IsOk());
241
242 // Start the loop of reading tensors from our pipeline
243 DatasetIterator dI(myTree);
244 TensorRow tensorList;
245 rc = dI.FetchNextTensorRow(&tensorList);
246 EXPECT_TRUE(rc.IsOk());
247 int rowCount = 0;
248 while (!tensorList.empty()) {
249 MS_LOG(INFO) << "Row display for row #: " << rowCount;
250
251 // Display the tensor by calling the printer on it
252 for (int i = 0; i < tensorList.size(); i++) {
253 std::ostringstream ss;
254 ss << *tensorList[i] << std::endl;
255 MS_LOG(INFO) << "Tensor print: %s" << ss.str();
256 }
257
258 rc = dI.FetchNextTensorRow(&tensorList);
259 EXPECT_TRUE(rc.IsOk());
260 rowCount++;
261 }
262 ASSERT_EQ(rowCount, 20);
263 }
264
265 // Test info:
266 // - json schema input it's a fairly simple one
267 // - has an interaction loop
268 // - same as MindDataTestRandomDataOpBasic4 except that this one will have parallel workers
269 //
270 // Tree: RepeatOp over RandomDataOp
271 //
272 // RepeatOp
273 // |
274 // RandomDataOp
275 //
TEST_F(MindDataTestRandomDataOp,RandomDataOpBasic5)276 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) {
277 Status rc;
278 MS_LOG(INFO) << "UT test RandomDataOpBasic5";
279
280 // Start with an empty execution tree
281 auto myTree = std::make_shared<ExecutionTree>();
282
283 std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
284 rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
285 EXPECT_TRUE(rc.IsOk());
286 std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
287 auto op_connector_size = cfg->op_connector_size();
288
289 std::shared_ptr<RandomDataOp> myRandomDataOp =
290 std::make_shared<RandomDataOp>(4, op_connector_size, 10, std::move(testSchema));
291
292 rc = myTree->AssociateNode(myRandomDataOp);
293 EXPECT_TRUE(rc.IsOk());
294
295 uint32_t numRepeats = 3;
296 std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats);
297 rc = myTree->AssociateNode(myRepeatOp);
298 EXPECT_TRUE(rc.IsOk());
299
300 myRandomDataOp->SetTotalRepeats(numRepeats);
301 myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats);
302 rc = myRepeatOp->AddChild(myRandomDataOp);
303 EXPECT_TRUE(rc.IsOk());
304
305 rc = myTree->AssignRoot(myRepeatOp);
306 EXPECT_TRUE(rc.IsOk());
307
308 MS_LOG(INFO) << "Launching tree and begin iteration";
309 rc = myTree->Prepare();
310 EXPECT_TRUE(rc.IsOk());
311 rc = myTree->Launch();
312 EXPECT_TRUE(rc.IsOk());
313
314 // Start the loop of reading tensors from our pipeline
315 DatasetIterator dI(myTree);
316 TensorRow tensorList;
317 rc = dI.FetchNextTensorRow(&tensorList);
318 EXPECT_TRUE(rc.IsOk());
319 int rowCount = 0;
320 while (!tensorList.empty()) {
321 MS_LOG(INFO) << "Row display for row #: " << rowCount;
322
323 // Display the tensor by calling the printer on it
324 for (int i = 0; i < tensorList.size(); i++) {
325 std::ostringstream ss;
326 ss << *tensorList[i] << std::endl;
327 MS_LOG(INFO) << "Tensor print: ", ss.str();
328 }
329
330 rc = dI.FetchNextTensorRow(&tensorList);
331 EXPECT_TRUE(rc.IsOk());
332 rowCount++;
333 }
334 ASSERT_EQ(rowCount, 30);
335 }
336
337 // Test info:
338 // - repeat shuffle random
339 //
340 // Tree: RepeatOp over RandomDataOp
341 //
342 // RepeatOp
343 // |
344 // ShuffleOp
345 // |
346 // RandomDataOp
347 //
TEST_F(MindDataTestRandomDataOp,RandomDataOpTree1)348 TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) {
349 Status rc;
350 MS_LOG(INFO) << "UT test RandomDataOpTree1";
351
352 // Start with an empty execution tree
353 auto myTree = std::make_shared<ExecutionTree>();
354
355 std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
356 rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
357 EXPECT_TRUE(rc.IsOk());
358 std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
359 auto op_connector_size = cfg->op_connector_size();
360
361 std::shared_ptr<RandomDataOp> myRandomDataOp =
362 std::make_shared<RandomDataOp>(4, op_connector_size, 10, std::move(testSchema));
363
364 rc = myTree->AssociateNode(myRandomDataOp);
365 EXPECT_TRUE(rc.IsOk());
366 uint32_t shuffle_seed = GetSeed();
367 std::shared_ptr<ShuffleOp> myShuffleOp = std::make_shared<ShuffleOp>(4, shuffle_seed, op_connector_size, true);
368
369 rc = myTree->AssociateNode(myShuffleOp);
370 EXPECT_TRUE(rc.IsOk());
371
372 uint32_t numRepeats = 3;
373 std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats);
374 rc = myTree->AssociateNode(myRepeatOp);
375 EXPECT_TRUE(rc.IsOk());
376
377 myShuffleOp->SetTotalRepeats(numRepeats);
378 myShuffleOp->SetNumRepeatsPerEpoch(numRepeats);
379 rc = myRepeatOp->AddChild(myShuffleOp);
380 EXPECT_TRUE(rc.IsOk());
381
382 myRandomDataOp->SetTotalRepeats(numRepeats);
383 myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats);
384 rc = myShuffleOp->AddChild(myRandomDataOp);
385 EXPECT_TRUE(rc.IsOk());
386
387 rc = myTree->AssignRoot(myRepeatOp);
388 EXPECT_TRUE(rc.IsOk());
389
390 MS_LOG(INFO) << "Launching tree and begin iteration";
391 rc = myTree->Prepare();
392 EXPECT_TRUE(rc.IsOk());
393 rc = myTree->Launch();
394 EXPECT_TRUE(rc.IsOk());
395
396 // Start the loop of reading tensors from our pipeline
397 DatasetIterator dI(myTree);
398 TensorRow tensorList;
399 rc = dI.FetchNextTensorRow(&tensorList);
400 EXPECT_TRUE(rc.IsOk());
401 int rowCount = 0;
402 while (!tensorList.empty()) {
403 MS_LOG(INFO) << "Row display for row #: " << rowCount;
404
405 // Display the tensor by calling the printer on it
406 for (int i = 0; i < tensorList.size(); i++) {
407 std::ostringstream ss;
408 ss << *tensorList[i] << std::endl;
409 MS_LOG(INFO) << "Tensor print: " << ss.str();
410 }
411
412 rc = dI.FetchNextTensorRow(&tensorList);
413 EXPECT_TRUE(rc.IsOk());
414 rowCount++;
415 }
416 ASSERT_EQ(rowCount, 30);
417 }
418