• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "minddata/dataset/core/client.h"
18 #include "common/common.h"
19 #include "gtest/gtest.h"
20 #include <memory>
21 #include <vector>
22 #include <iostream>
23 #include "minddata/dataset/core/tensor_shape.h"
24 #include "minddata/dataset/engine/datasetops/source/random_data_op.h"
25 #include "minddata/dataset/engine/data_schema.h"
26 #include "minddata/dataset/util/random.h"
27 
28 using namespace mindspore::dataset;
29 using mindspore::LogStream;
30 using mindspore::ExceptionType::NoExceptionType;
31 using mindspore::MsLogLevel::INFO;
32 
33 class MindDataTestRandomDataOp : public UT::DatasetOpTesting {};
34 
35 // Test info:
36 // - Simple test with a user-provided schema generated purely from DataSchema C API
37 // - has an interaction loop
38 //
39 // Tree:  single node tree with RandomDataOp
40 //
41 //    RandomDataOp
42 //
43 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) {
44   Status rc;
45   int32_t rank = 0;  // not used
46   MS_LOG(INFO) << "UT test RandomDataOpBasic1";
47 
48   // Start with an empty execution tree
49   auto myTree = std::make_shared<ExecutionTree>();
50 
51   // Create a schema using the C api's
52   std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
53 
54   // RandomDataOp can randomly fill in unknown dimension lengths of a shape.
55   // Most other ops cannot do that as they are limited by the physical data itself. We're
56   // more flexible with random data since it is just making stuff up on the fly.
57   TensorShape c1Shape({TensorShape::kDimUnknown, TensorShape::kDimUnknown, 3});
58   ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible,
59                    rank,  // not used
60                    &c1Shape);
61 
62   // Column 2 will just be a scalar label number
63   TensorShape c2Shape({});  // empty shape is a 1-value scalar Tensor
64   ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape);
65 
66   testSchema->AddColumn(c1);
67   testSchema->AddColumn(c2);
68   std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
69   auto op_connector_size = cfg->op_connector_size();
70 
71   std::shared_ptr<RandomDataOp> myRandomDataOp =
72     std::make_shared<RandomDataOp>(1, op_connector_size, 25, std::move(testSchema));
73 
74   rc = myTree->AssociateNode(myRandomDataOp);
75   EXPECT_TRUE(rc.IsOk());
76 
77   rc = myTree->AssignRoot(myRandomDataOp);
78   EXPECT_TRUE(rc.IsOk());
79 
80   std::ostringstream ss;
81   ss << *myRandomDataOp;
82   MS_LOG(INFO) << "RandomDataOp print: %s" << ss.str();
83 
84   MS_LOG(INFO) << "Launching tree and begin iteration";
85   rc = myTree->Prepare();
86   EXPECT_TRUE(rc.IsOk());
87   rc = myTree->Launch();
88   EXPECT_TRUE(rc.IsOk());
89 
90   // Start the loop of reading tensors from our pipeline
91   DatasetIterator dI(myTree);
92   TensorRow tensorList;
93   rc = dI.FetchNextTensorRow(&tensorList);
94   EXPECT_TRUE(rc.IsOk());
95   int rowCount = 0;
96   while (!tensorList.empty()) {
97     // Don't display these rows...too big to show
98     MS_LOG(INFO) << "Row fetched #: " << rowCount;
99 
100     rc = dI.FetchNextTensorRow(&tensorList);
101     EXPECT_TRUE(rc.IsOk());
102     rowCount++;
103   }
104   ASSERT_EQ(rowCount, 25);
105 }
106 
107 // Test info:
108 // - Simple test with a randomly generated schema
109 // - no iteration loop on this one, just create the op
110 //
111 // Tree:  single node tree with RandomDataOp
112 //
113 //    RandomDataOp
114 //
115 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) {
116   Status rc;
117   MS_LOG(INFO) << "UT test RandomDataOpBasic2";
118 
119   // Start with an empty execution tree
120   auto myTree = std::make_shared<ExecutionTree>();
121 
122   std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
123   auto op_connector_size = cfg->op_connector_size();
124 
125   std::shared_ptr<RandomDataOp> myRandomDataOp = std::make_shared<RandomDataOp>(1, op_connector_size, 0, nullptr);
126 
127   rc = myTree->AssociateNode(myRandomDataOp);
128   EXPECT_TRUE(rc.IsOk());
129 
130   rc = myTree->AssignRoot(myRandomDataOp);
131   EXPECT_TRUE(rc.IsOk());
132 
133   std::ostringstream ss;
134   ss << *myRandomDataOp;
135   MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
136 }
137 
138 // Test info:
139 // - json file test with iteration
140 //
141 // Tree:  single node tree with RandomDataOp
142 //
143 //    RandomDataOp
144 //
145 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) {
146   Status rc;
147   MS_LOG(INFO) << "UT test RandomDataOpBasic3";
148 
149   // Start with an empty execution tree
150   auto myTree = std::make_shared<ExecutionTree>();
151 
152   std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
153   rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema.json", {});
154   EXPECT_TRUE(rc.IsOk());
155   std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
156   auto op_connector_size = cfg->op_connector_size();
157 
158   std::shared_ptr<RandomDataOp> myRandomDataOp =
159     std::make_shared<RandomDataOp>(1, op_connector_size, 10, std::move(testSchema));
160 
161   rc = myTree->AssociateNode(myRandomDataOp);
162   EXPECT_TRUE(rc.IsOk());
163 
164   rc = myTree->AssignRoot(myRandomDataOp);
165   EXPECT_TRUE(rc.IsOk());
166 
167   std::ostringstream ss;
168   ss << *myRandomDataOp;
169   MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
170 
171   MS_LOG(INFO) << "Launching tree and begin iteration";
172   rc = myTree->Prepare();
173   EXPECT_TRUE(rc.IsOk());
174   rc = myTree->Launch();
175   EXPECT_TRUE(rc.IsOk());
176 
177   // Start the loop of reading tensors from our pipeline
178   DatasetIterator dI(myTree);
179   TensorRow tensorList;
180   rc = dI.FetchNextTensorRow(&tensorList);
181   EXPECT_TRUE(rc.IsOk());
182   int rowCount = 0;
183   while (!tensorList.empty()) {
184     // Don't display these rows...too big to show
185     MS_LOG(INFO) << "Row fetched #: " << rowCount;
186 
187     rc = dI.FetchNextTensorRow(&tensorList);
188     EXPECT_TRUE(rc.IsOk());
189     rowCount++;
190   }
191   ASSERT_EQ(rowCount, 10);
192 }
193 
194 // Test info:
195 // - json schema input it's a fairly simple one
196 // - has an interaction loop
197 //
198 // Tree:  RepeatOp over RandomDataOp
199 //
200 //     RepeatOp
201 //        |
202 //    RandomDataOp
203 //
204 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) {
205   Status rc;
206   MS_LOG(INFO) << "UT test RandomDataOpBasic4";
207 
208   // Start with an empty execution tree
209   auto myTree = std::make_shared<ExecutionTree>();
210 
211   std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
212   rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
213   EXPECT_TRUE(rc.IsOk());
214   std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
215   auto op_connector_size = cfg->op_connector_size();
216 
217   std::shared_ptr<RandomDataOp> myRandomDataOp =
218     std::make_shared<RandomDataOp>(1, op_connector_size, 10, std::move(testSchema));
219 
220   rc = myTree->AssociateNode(myRandomDataOp);
221   EXPECT_TRUE(rc.IsOk());
222 
223   uint32_t numRepeats = 2;
224   std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats);
225   rc = myTree->AssociateNode(myRepeatOp);
226   EXPECT_TRUE(rc.IsOk());
227 
228   myRandomDataOp->SetTotalRepeats(numRepeats);
229   myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats);
230   rc = myRepeatOp->AddChild(myRandomDataOp);
231   EXPECT_TRUE(rc.IsOk());
232 
233   rc = myTree->AssignRoot(myRepeatOp);
234   EXPECT_TRUE(rc.IsOk());
235 
236   MS_LOG(INFO) << "Launching tree and begin iteration";
237   rc = myTree->Prepare();
238   EXPECT_TRUE(rc.IsOk());
239   rc = myTree->Launch();
240   EXPECT_TRUE(rc.IsOk());
241 
242   // Start the loop of reading tensors from our pipeline
243   DatasetIterator dI(myTree);
244   TensorRow tensorList;
245   rc = dI.FetchNextTensorRow(&tensorList);
246   EXPECT_TRUE(rc.IsOk());
247   int rowCount = 0;
248   while (!tensorList.empty()) {
249     MS_LOG(INFO) << "Row display for row #: " << rowCount;
250 
251     // Display the tensor by calling the printer on it
252     for (int i = 0; i < tensorList.size(); i++) {
253       std::ostringstream ss;
254       ss << *tensorList[i] << std::endl;
255       MS_LOG(INFO) << "Tensor print: %s" << ss.str();
256     }
257 
258     rc = dI.FetchNextTensorRow(&tensorList);
259     EXPECT_TRUE(rc.IsOk());
260     rowCount++;
261   }
262   ASSERT_EQ(rowCount, 20);
263 }
264 
265 // Test info:
266 // - json schema input it's a fairly simple one
267 // - has an interaction loop
268 // - same as MindDataTestRandomDataOpBasic4 except that this one will have parallel workers
269 //
270 // Tree:  RepeatOp over RandomDataOp
271 //
272 //     RepeatOp
273 //        |
274 //    RandomDataOp
275 //
276 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) {
277   Status rc;
278   MS_LOG(INFO) << "UT test RandomDataOpBasic5";
279 
280   // Start with an empty execution tree
281   auto myTree = std::make_shared<ExecutionTree>();
282 
283   std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
284   rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
285   EXPECT_TRUE(rc.IsOk());
286   std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
287   auto op_connector_size = cfg->op_connector_size();
288 
289   std::shared_ptr<RandomDataOp> myRandomDataOp =
290     std::make_shared<RandomDataOp>(4, op_connector_size, 10, std::move(testSchema));
291 
292   rc = myTree->AssociateNode(myRandomDataOp);
293   EXPECT_TRUE(rc.IsOk());
294 
295   uint32_t numRepeats = 3;
296   std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats);
297   rc = myTree->AssociateNode(myRepeatOp);
298   EXPECT_TRUE(rc.IsOk());
299 
300   myRandomDataOp->SetTotalRepeats(numRepeats);
301   myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats);
302   rc = myRepeatOp->AddChild(myRandomDataOp);
303   EXPECT_TRUE(rc.IsOk());
304 
305   rc = myTree->AssignRoot(myRepeatOp);
306   EXPECT_TRUE(rc.IsOk());
307 
308   MS_LOG(INFO) << "Launching tree and begin iteration";
309   rc = myTree->Prepare();
310   EXPECT_TRUE(rc.IsOk());
311   rc = myTree->Launch();
312   EXPECT_TRUE(rc.IsOk());
313 
314   // Start the loop of reading tensors from our pipeline
315   DatasetIterator dI(myTree);
316   TensorRow tensorList;
317   rc = dI.FetchNextTensorRow(&tensorList);
318   EXPECT_TRUE(rc.IsOk());
319   int rowCount = 0;
320   while (!tensorList.empty()) {
321     MS_LOG(INFO) << "Row display for row #: " << rowCount;
322 
323     // Display the tensor by calling the printer on it
324     for (int i = 0; i < tensorList.size(); i++) {
325       std::ostringstream ss;
326       ss << *tensorList[i] << std::endl;
327       MS_LOG(INFO) << "Tensor print: ", ss.str();
328     }
329 
330     rc = dI.FetchNextTensorRow(&tensorList);
331     EXPECT_TRUE(rc.IsOk());
332     rowCount++;
333   }
334   ASSERT_EQ(rowCount, 30);
335 }
336 
337 // Test info:
338 // - repeat shuffle random
339 //
340 // Tree:  RepeatOp over RandomDataOp
341 //
342 //     RepeatOp
343 //        |
344 //     ShuffleOp
345 //        |
346 //    RandomDataOp
347 //
348 TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) {
349   Status rc;
350   MS_LOG(INFO) << "UT test RandomDataOpTree1";
351 
352   // Start with an empty execution tree
353   auto myTree = std::make_shared<ExecutionTree>();
354 
355   std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
356   rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
357   EXPECT_TRUE(rc.IsOk());
358   std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
359   auto op_connector_size = cfg->op_connector_size();
360 
361   std::shared_ptr<RandomDataOp> myRandomDataOp =
362     std::make_shared<RandomDataOp>(4, op_connector_size, 10, std::move(testSchema));
363 
364   rc = myTree->AssociateNode(myRandomDataOp);
365   EXPECT_TRUE(rc.IsOk());
366   uint32_t shuffle_seed = GetSeed();
367   std::shared_ptr<ShuffleOp> myShuffleOp = std::make_shared<ShuffleOp>(4, shuffle_seed, op_connector_size, true);
368 
369   rc = myTree->AssociateNode(myShuffleOp);
370   EXPECT_TRUE(rc.IsOk());
371 
372   uint32_t numRepeats = 3;
373   std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats);
374   rc = myTree->AssociateNode(myRepeatOp);
375   EXPECT_TRUE(rc.IsOk());
376 
377   myShuffleOp->SetTotalRepeats(numRepeats);
378   myShuffleOp->SetNumRepeatsPerEpoch(numRepeats);
379   rc = myRepeatOp->AddChild(myShuffleOp);
380   EXPECT_TRUE(rc.IsOk());
381 
382   myRandomDataOp->SetTotalRepeats(numRepeats);
383   myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats);
384   rc = myShuffleOp->AddChild(myRandomDataOp);
385   EXPECT_TRUE(rc.IsOk());
386 
387   rc = myTree->AssignRoot(myRepeatOp);
388   EXPECT_TRUE(rc.IsOk());
389 
390   MS_LOG(INFO) << "Launching tree and begin iteration";
391   rc = myTree->Prepare();
392   EXPECT_TRUE(rc.IsOk());
393   rc = myTree->Launch();
394   EXPECT_TRUE(rc.IsOk());
395 
396   // Start the loop of reading tensors from our pipeline
397   DatasetIterator dI(myTree);
398   TensorRow tensorList;
399   rc = dI.FetchNextTensorRow(&tensorList);
400   EXPECT_TRUE(rc.IsOk());
401   int rowCount = 0;
402   while (!tensorList.empty()) {
403     MS_LOG(INFO) << "Row display for row #: " << rowCount;
404 
405     // Display the tensor by calling the printer on it
406     for (int i = 0; i < tensorList.size(); i++) {
407       std::ostringstream ss;
408       ss << *tensorList[i] << std::endl;
409       MS_LOG(INFO) << "Tensor print: " << ss.str();
410     }
411 
412     rc = dI.FetchNextTensorRow(&tensorList);
413     EXPECT_TRUE(rc.IsOk());
414     rowCount++;
415   }
416   ASSERT_EQ(rowCount, 30);
417 }
418