1 /** 2 * Copyright 2019-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "minddata/dataset/core/client.h" 18 #include "common/common.h" 19 #include "gtest/gtest.h" 20 #include <memory> 21 #include <vector> 22 #include <iostream> 23 #include "minddata/dataset/core/tensor_shape.h" 24 #include "minddata/dataset/engine/datasetops/source/random_data_op.h" 25 #include "minddata/dataset/engine/data_schema.h" 26 #include "minddata/dataset/util/random.h" 27 28 using namespace mindspore::dataset; 29 using mindspore::LogStream; 30 using mindspore::ExceptionType::NoExceptionType; 31 using mindspore::MsLogLevel::INFO; 32 33 class MindDataTestRandomDataOp : public UT::DatasetOpTesting {}; 34 35 // Test info: 36 // - Simple test with a user-provided schema generated purely from DataSchema C API 37 // - has an interaction loop 38 // 39 // Tree: single node tree with RandomDataOp 40 // 41 // RandomDataOp 42 // 43 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) { 44 Status rc; 45 int32_t rank = 0; // not used 46 MS_LOG(INFO) << "UT test RandomDataOpBasic1"; 47 48 // Start with an empty execution tree 49 auto myTree = std::make_shared<ExecutionTree>(); 50 51 // Create a schema using the C api's 52 std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>(); 53 54 // RandomDataOp can randomly fill in unknown dimension lengths of a shape. 55 // Most other ops cannot do that as they are limited by the physical data itself. We're 56 // more flexible with random data since it is just making stuff up on the fly. 57 TensorShape c1Shape({TensorShape::kDimUnknown, TensorShape::kDimUnknown, 3}); 58 ColDescriptor c1("image", DataType(DataType::DE_INT8), TensorImpl::kFlexible, 59 rank, // not used 60 &c1Shape); 61 62 // Column 2 will just be a scalar label number 63 TensorShape c2Shape({}); // empty shape is a 1-value scalar Tensor 64 ColDescriptor c2("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, rank, &c2Shape); 65 66 testSchema->AddColumn(c1); 67 testSchema->AddColumn(c2); 68 std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); 69 auto op_connector_size = cfg->op_connector_size(); 70 71 std::shared_ptr<RandomDataOp> myRandomDataOp = 72 std::make_shared<RandomDataOp>(1, op_connector_size, 25, std::move(testSchema)); 73 74 rc = myTree->AssociateNode(myRandomDataOp); 75 EXPECT_TRUE(rc.IsOk()); 76 77 rc = myTree->AssignRoot(myRandomDataOp); 78 EXPECT_TRUE(rc.IsOk()); 79 80 std::ostringstream ss; 81 ss << *myRandomDataOp; 82 MS_LOG(INFO) << "RandomDataOp print: %s" << ss.str(); 83 84 MS_LOG(INFO) << "Launching tree and begin iteration"; 85 rc = myTree->Prepare(); 86 EXPECT_TRUE(rc.IsOk()); 87 rc = myTree->Launch(); 88 EXPECT_TRUE(rc.IsOk()); 89 90 // Start the loop of reading tensors from our pipeline 91 DatasetIterator dI(myTree); 92 TensorRow tensorList; 93 rc = dI.FetchNextTensorRow(&tensorList); 94 EXPECT_TRUE(rc.IsOk()); 95 int rowCount = 0; 96 while (!tensorList.empty()) { 97 // Don't display these rows...too big to show 98 MS_LOG(INFO) << "Row fetched #: " << rowCount; 99 100 rc = dI.FetchNextTensorRow(&tensorList); 101 EXPECT_TRUE(rc.IsOk()); 102 rowCount++; 103 } 104 ASSERT_EQ(rowCount, 25); 105 } 106 107 // Test info: 108 // - Simple test with a randomly generated schema 109 // - no iteration loop on this one, just create the op 110 // 111 // Tree: single node tree with RandomDataOp 112 // 113 // RandomDataOp 114 // 115 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) { 116 Status rc; 117 MS_LOG(INFO) << "UT test RandomDataOpBasic2"; 118 119 // Start with an empty execution tree 120 auto myTree = std::make_shared<ExecutionTree>(); 121 122 std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); 123 auto op_connector_size = cfg->op_connector_size(); 124 125 std::shared_ptr<RandomDataOp> myRandomDataOp = std::make_shared<RandomDataOp>(1, op_connector_size, 0, nullptr); 126 127 rc = myTree->AssociateNode(myRandomDataOp); 128 EXPECT_TRUE(rc.IsOk()); 129 130 rc = myTree->AssignRoot(myRandomDataOp); 131 EXPECT_TRUE(rc.IsOk()); 132 133 std::ostringstream ss; 134 ss << *myRandomDataOp; 135 MS_LOG(INFO) << "RandomDataOp print: " << ss.str(); 136 } 137 138 // Test info: 139 // - json file test with iteration 140 // 141 // Tree: single node tree with RandomDataOp 142 // 143 // RandomDataOp 144 // 145 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) { 146 Status rc; 147 MS_LOG(INFO) << "UT test RandomDataOpBasic3"; 148 149 // Start with an empty execution tree 150 auto myTree = std::make_shared<ExecutionTree>(); 151 152 std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>(); 153 rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema.json", {}); 154 EXPECT_TRUE(rc.IsOk()); 155 std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); 156 auto op_connector_size = cfg->op_connector_size(); 157 158 std::shared_ptr<RandomDataOp> myRandomDataOp = 159 std::make_shared<RandomDataOp>(1, op_connector_size, 10, std::move(testSchema)); 160 161 rc = myTree->AssociateNode(myRandomDataOp); 162 EXPECT_TRUE(rc.IsOk()); 163 164 rc = myTree->AssignRoot(myRandomDataOp); 165 EXPECT_TRUE(rc.IsOk()); 166 167 std::ostringstream ss; 168 ss << *myRandomDataOp; 169 MS_LOG(INFO) << "RandomDataOp print: " << ss.str(); 170 171 MS_LOG(INFO) << "Launching tree and begin iteration"; 172 rc = myTree->Prepare(); 173 EXPECT_TRUE(rc.IsOk()); 174 rc = myTree->Launch(); 175 EXPECT_TRUE(rc.IsOk()); 176 177 // Start the loop of reading tensors from our pipeline 178 DatasetIterator dI(myTree); 179 TensorRow tensorList; 180 rc = dI.FetchNextTensorRow(&tensorList); 181 EXPECT_TRUE(rc.IsOk()); 182 int rowCount = 0; 183 while (!tensorList.empty()) { 184 // Don't display these rows...too big to show 185 MS_LOG(INFO) << "Row fetched #: " << rowCount; 186 187 rc = dI.FetchNextTensorRow(&tensorList); 188 EXPECT_TRUE(rc.IsOk()); 189 rowCount++; 190 } 191 ASSERT_EQ(rowCount, 10); 192 } 193 194 // Test info: 195 // - json schema input it's a fairly simple one 196 // - has an interaction loop 197 // 198 // Tree: RepeatOp over RandomDataOp 199 // 200 // RepeatOp 201 // | 202 // RandomDataOp 203 // 204 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) { 205 Status rc; 206 MS_LOG(INFO) << "UT test RandomDataOpBasic4"; 207 208 // Start with an empty execution tree 209 auto myTree = std::make_shared<ExecutionTree>(); 210 211 std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>(); 212 rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {}); 213 EXPECT_TRUE(rc.IsOk()); 214 std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); 215 auto op_connector_size = cfg->op_connector_size(); 216 217 std::shared_ptr<RandomDataOp> myRandomDataOp = 218 std::make_shared<RandomDataOp>(1, op_connector_size, 10, std::move(testSchema)); 219 220 rc = myTree->AssociateNode(myRandomDataOp); 221 EXPECT_TRUE(rc.IsOk()); 222 223 uint32_t numRepeats = 2; 224 std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats); 225 rc = myTree->AssociateNode(myRepeatOp); 226 EXPECT_TRUE(rc.IsOk()); 227 228 myRandomDataOp->SetTotalRepeats(numRepeats); 229 myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats); 230 rc = myRepeatOp->AddChild(myRandomDataOp); 231 EXPECT_TRUE(rc.IsOk()); 232 233 rc = myTree->AssignRoot(myRepeatOp); 234 EXPECT_TRUE(rc.IsOk()); 235 236 MS_LOG(INFO) << "Launching tree and begin iteration"; 237 rc = myTree->Prepare(); 238 EXPECT_TRUE(rc.IsOk()); 239 rc = myTree->Launch(); 240 EXPECT_TRUE(rc.IsOk()); 241 242 // Start the loop of reading tensors from our pipeline 243 DatasetIterator dI(myTree); 244 TensorRow tensorList; 245 rc = dI.FetchNextTensorRow(&tensorList); 246 EXPECT_TRUE(rc.IsOk()); 247 int rowCount = 0; 248 while (!tensorList.empty()) { 249 MS_LOG(INFO) << "Row display for row #: " << rowCount; 250 251 // Display the tensor by calling the printer on it 252 for (int i = 0; i < tensorList.size(); i++) { 253 std::ostringstream ss; 254 ss << *tensorList[i] << std::endl; 255 MS_LOG(INFO) << "Tensor print: %s" << ss.str(); 256 } 257 258 rc = dI.FetchNextTensorRow(&tensorList); 259 EXPECT_TRUE(rc.IsOk()); 260 rowCount++; 261 } 262 ASSERT_EQ(rowCount, 20); 263 } 264 265 // Test info: 266 // - json schema input it's a fairly simple one 267 // - has an interaction loop 268 // - same as MindDataTestRandomDataOpBasic4 except that this one will have parallel workers 269 // 270 // Tree: RepeatOp over RandomDataOp 271 // 272 // RepeatOp 273 // | 274 // RandomDataOp 275 // 276 TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) { 277 Status rc; 278 MS_LOG(INFO) << "UT test RandomDataOpBasic5"; 279 280 // Start with an empty execution tree 281 auto myTree = std::make_shared<ExecutionTree>(); 282 283 std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>(); 284 rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {}); 285 EXPECT_TRUE(rc.IsOk()); 286 std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); 287 auto op_connector_size = cfg->op_connector_size(); 288 289 std::shared_ptr<RandomDataOp> myRandomDataOp = 290 std::make_shared<RandomDataOp>(4, op_connector_size, 10, std::move(testSchema)); 291 292 rc = myTree->AssociateNode(myRandomDataOp); 293 EXPECT_TRUE(rc.IsOk()); 294 295 uint32_t numRepeats = 3; 296 std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats); 297 rc = myTree->AssociateNode(myRepeatOp); 298 EXPECT_TRUE(rc.IsOk()); 299 300 myRandomDataOp->SetTotalRepeats(numRepeats); 301 myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats); 302 rc = myRepeatOp->AddChild(myRandomDataOp); 303 EXPECT_TRUE(rc.IsOk()); 304 305 rc = myTree->AssignRoot(myRepeatOp); 306 EXPECT_TRUE(rc.IsOk()); 307 308 MS_LOG(INFO) << "Launching tree and begin iteration"; 309 rc = myTree->Prepare(); 310 EXPECT_TRUE(rc.IsOk()); 311 rc = myTree->Launch(); 312 EXPECT_TRUE(rc.IsOk()); 313 314 // Start the loop of reading tensors from our pipeline 315 DatasetIterator dI(myTree); 316 TensorRow tensorList; 317 rc = dI.FetchNextTensorRow(&tensorList); 318 EXPECT_TRUE(rc.IsOk()); 319 int rowCount = 0; 320 while (!tensorList.empty()) { 321 MS_LOG(INFO) << "Row display for row #: " << rowCount; 322 323 // Display the tensor by calling the printer on it 324 for (int i = 0; i < tensorList.size(); i++) { 325 std::ostringstream ss; 326 ss << *tensorList[i] << std::endl; 327 MS_LOG(INFO) << "Tensor print: ", ss.str(); 328 } 329 330 rc = dI.FetchNextTensorRow(&tensorList); 331 EXPECT_TRUE(rc.IsOk()); 332 rowCount++; 333 } 334 ASSERT_EQ(rowCount, 30); 335 } 336 337 // Test info: 338 // - repeat shuffle random 339 // 340 // Tree: RepeatOp over RandomDataOp 341 // 342 // RepeatOp 343 // | 344 // ShuffleOp 345 // | 346 // RandomDataOp 347 // 348 TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) { 349 Status rc; 350 MS_LOG(INFO) << "UT test RandomDataOpTree1"; 351 352 // Start with an empty execution tree 353 auto myTree = std::make_shared<ExecutionTree>(); 354 355 std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>(); 356 rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {}); 357 EXPECT_TRUE(rc.IsOk()); 358 std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager(); 359 auto op_connector_size = cfg->op_connector_size(); 360 361 std::shared_ptr<RandomDataOp> myRandomDataOp = 362 std::make_shared<RandomDataOp>(4, op_connector_size, 10, std::move(testSchema)); 363 364 rc = myTree->AssociateNode(myRandomDataOp); 365 EXPECT_TRUE(rc.IsOk()); 366 uint32_t shuffle_seed = GetSeed(); 367 std::shared_ptr<ShuffleOp> myShuffleOp = std::make_shared<ShuffleOp>(4, shuffle_seed, op_connector_size, true); 368 369 rc = myTree->AssociateNode(myShuffleOp); 370 EXPECT_TRUE(rc.IsOk()); 371 372 uint32_t numRepeats = 3; 373 std::shared_ptr<RepeatOp> myRepeatOp = std::make_shared<RepeatOp>(numRepeats); 374 rc = myTree->AssociateNode(myRepeatOp); 375 EXPECT_TRUE(rc.IsOk()); 376 377 myShuffleOp->SetTotalRepeats(numRepeats); 378 myShuffleOp->SetNumRepeatsPerEpoch(numRepeats); 379 rc = myRepeatOp->AddChild(myShuffleOp); 380 EXPECT_TRUE(rc.IsOk()); 381 382 myRandomDataOp->SetTotalRepeats(numRepeats); 383 myRandomDataOp->SetNumRepeatsPerEpoch(numRepeats); 384 rc = myShuffleOp->AddChild(myRandomDataOp); 385 EXPECT_TRUE(rc.IsOk()); 386 387 rc = myTree->AssignRoot(myRepeatOp); 388 EXPECT_TRUE(rc.IsOk()); 389 390 MS_LOG(INFO) << "Launching tree and begin iteration"; 391 rc = myTree->Prepare(); 392 EXPECT_TRUE(rc.IsOk()); 393 rc = myTree->Launch(); 394 EXPECT_TRUE(rc.IsOk()); 395 396 // Start the loop of reading tensors from our pipeline 397 DatasetIterator dI(myTree); 398 TensorRow tensorList; 399 rc = dI.FetchNextTensorRow(&tensorList); 400 EXPECT_TRUE(rc.IsOk()); 401 int rowCount = 0; 402 while (!tensorList.empty()) { 403 MS_LOG(INFO) << "Row display for row #: " << rowCount; 404 405 // Display the tensor by calling the printer on it 406 for (int i = 0; i < tensorList.size(); i++) { 407 std::ostringstream ss; 408 ss << *tensorList[i] << std::endl; 409 MS_LOG(INFO) << "Tensor print: " << ss.str(); 410 } 411 412 rc = dI.FetchNextTensorRow(&tensorList); 413 EXPECT_TRUE(rc.IsOk()); 414 rowCount++; 415 } 416 ASSERT_EQ(rowCount, 30); 417 } 418