1 /** 2 * Copyright 2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "common/common.h" 17 #include "minddata/dataset/core/global_context.h" 18 #include "minddata/dataset/engine/serdes.h" 19 #include "minddata/dataset/include/dataset/datasets.h" 20 #include "minddata/dataset/include/dataset/vision.h" 21 #include "minddata/dataset/include/dataset/transforms.h" 22 #include "minddata/dataset/kernels/ir/data/transforms_ir.h" 23 24 using namespace mindspore::dataset; 25 using mindspore::dataset::DatasetNode; 26 27 using mindspore::dataset::ShuffleMode; 28 using mindspore::dataset::Tensor; 29 30 class MindDataTestDeserialize : public UT::DatasetOpTesting { 31 protected: 32 }; 33 34 void compare_dataset(std::shared_ptr<DatasetNode> ds) { 35 nlohmann::json out_json; 36 ASSERT_OK(Serdes::SaveToJSON(ds, "dataset_pipeline.json", &out_json)); 37 // output the deserialized out_json to ds1 and then out_json1 38 std::shared_ptr<DatasetNode> ds1; 39 ASSERT_OK(Serdes::Deserialize("dataset_pipeline.json", &ds1)); 40 EXPECT_NE(ds1, nullptr); 41 42 // check original and deserialized dataset are the same 43 nlohmann::json out_json1; 44 ASSERT_OK(Serdes::SaveToJSON(ds1, "dataset_pipeline_1.json", &out_json1)); 45 std::stringstream json_ss; 46 json_ss << out_json; 47 std::stringstream json_ss1; 48 json_ss1 << out_json1; 49 EXPECT_EQ(json_ss.str(), json_ss1.str()); 50 return; 51 } 52 53 // test mnist dataset, and special cases of tensor operations (no input or tensor operation input) 54 TEST_F(MindDataTestDeserialize, TestDeserializeMnist) { 55 MS_LOG(INFO) << "Doing MindDataTestDeserialize-Minist."; 56 std::string data_dir = "./data/dataset/testMnistData"; 57 std::string usage = "all"; 58 std::shared_ptr<SamplerObj> sampler = std::make_shared<RandomSamplerObj>(true, 100); 59 std::shared_ptr<DatasetNode> ds = std::make_shared<MnistNode>(data_dir, usage, sampler, nullptr); 60 std::shared_ptr<TensorOperation> operation0 = std::make_shared<vision::EqualizeOperation>(); 61 std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::BoundingBoxAugmentOperation>(operation0, 0.5); 62 std::shared_ptr<TensorOperation> operation2 = std::make_shared<vision::HorizontalFlipOperation>(); 63 std::shared_ptr<TensorOperation> operation3 = std::make_shared<vision::HwcToChwOperation>(); 64 std::shared_ptr<TensorOperation> operation4 = std::make_shared<vision::RgbaToBgrOperation>(); 65 std::shared_ptr<TensorOperation> operation5 = std::make_shared<vision::RgbaToRgbOperation>(); 66 std::shared_ptr<TensorOperation> operation6 = std::make_shared<vision::SwapRedBlueOperation>(); 67 std::vector<std::vector<std::pair<std::shared_ptr<TensorOperation>, double>>> policy; 68 std::vector<std::pair<std::shared_ptr<TensorOperation>, double>> sub_policy; 69 sub_policy.push_back(std::make_pair(operation1, 0.4)); 70 policy.push_back(sub_policy); 71 std::shared_ptr<TensorOperation> operation7 = std::make_shared<vision::RandomSelectSubpolicyOperation>(policy); 72 std::vector<std::shared_ptr<TensorOperation>> transforms; 73 transforms.push_back(operation2); 74 transforms.push_back(operation3); 75 transforms.push_back(operation4); 76 std::shared_ptr<TensorOperation> operation8 = std::make_shared<vision::UniformAugOperation>(transforms, 3); 77 transforms.push_back(operation5); 78 transforms.push_back(operation6); 79 transforms.push_back(operation7); 80 transforms.push_back(operation8); 81 ds = std::make_shared<MapNode>(ds, transforms); 82 ds = std::make_shared<BatchNode>(ds, 10, true); 83 compare_dataset(ds); 84 } 85 86 // test celeba dataset and part of the tensor operation 87 TEST_F(MindDataTestDeserialize, TestDeserializeCelebA) { 88 MS_LOG(INFO) << "Doing MindDataTestDeserialize-CelebA."; 89 std::string data_dir = "./data/dataset/testCelebAData/"; 90 std::string usage = "all"; 91 std::shared_ptr<SamplerObj> sampler = std::make_shared<DistributedSamplerObj>(1, 0, true, 2, 1, 1, true); 92 bool decode = true; 93 std::set<std::string> extensions = {}; 94 std::shared_ptr<DatasetCache> cache = nullptr; 95 std::shared_ptr<DatasetNode> ds = std::make_shared<CelebANode>(data_dir, usage, sampler, decode, extensions, cache); 96 std::vector<int32_t> size = {80, 80}; 97 std::vector<int32_t> size1 = {80, 80}; 98 std::vector<int32_t> coordinates = {5, 5}; 99 std::vector<int32_t> padding = {20, 20, 20, 20}; 100 std::vector<uint8_t> fill_value = {20, 20, 20}; 101 std::vector<uint32_t> ignore = {20, 20, 20, 20}; 102 std::vector<float> mean = {2.0, 2.0, 2.0, 2.0}; 103 std::vector<float> std = {0.5, 0.5, 0.5, 0.5}; 104 std::vector<float> translation = {0.5, 0.5}; 105 std::vector<float> shear = {0.5, 0.5}; 106 std::vector<float> sigma = {0.5, 0.5}; 107 InterpolationMode interpolation = InterpolationMode::kLinear; 108 std::shared_ptr<TensorOperation> operation0 = 109 std::make_shared<vision::AffineOperation>(0.0, translation, 0.5, shear, interpolation, fill_value); 110 std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::AutoContrastOperation>(0.5, ignore); 111 std::shared_ptr<TensorOperation> operation2 = std::make_shared<vision::CenterCropOperation>(size); 112 std::shared_ptr<TensorOperation> operation3 = 113 std::make_shared<vision::CutMixBatchOperation>(ImageBatchFormat::kNHWC, 0.1, 0.1); 114 std::shared_ptr<TensorOperation> operation4 = std::make_shared<vision::CutOutOperation>(1, 1); 115 std::shared_ptr<TensorOperation> operation5 = std::make_shared<vision::DecodeOperation>(true); 116 std::shared_ptr<TensorOperation> operation6 = std::make_shared<vision::GaussianBlurOperation>(coordinates, sigma); 117 std::shared_ptr<TensorOperation> operation7 = std::make_shared<vision::MixUpBatchOperation>(1.0); 118 std::shared_ptr<TensorOperation> operation8 = std::make_shared<vision::NormalizeOperation>(mean, std); 119 std::shared_ptr<TensorOperation> operation9 = std::make_shared<vision::NormalizePadOperation>(mean, std, "float"); 120 std::shared_ptr<TensorOperation> operation10 = 121 std::make_shared<vision::PadOperation>(padding, fill_value, BorderType::kConstant); 122 std::shared_ptr<TensorOperation> operation11 = std::make_shared<vision::RescaleOperation>(1.0, 0.5); 123 std::shared_ptr<TensorOperation> operation12 = std::make_shared<vision::ResizePreserveAROperation>(10, 10, 0); 124 std::shared_ptr<TensorOperation> operation13 = std::make_shared<vision::ResizeWithBBoxOperation>(size, interpolation); 125 std::shared_ptr<TensorOperation> operation14 = std::make_shared<vision::ResizeOperation>(size, interpolation); 126 std::vector<std::shared_ptr<TensorOperation>> operations; 127 operations.push_back(operation0); 128 operations.push_back(operation1); 129 operations.push_back(operation2); 130 operations.push_back(operation3); 131 operations.push_back(operation4); 132 operations.push_back(operation5); 133 operations.push_back(operation6); 134 operations.push_back(operation7); 135 operations.push_back(operation8); 136 operations.push_back(operation9); 137 operations.push_back(operation10); 138 operations.push_back(operation11); 139 operations.push_back(operation12); 140 operations.push_back(operation13); 141 operations.push_back(operation14); 142 ds = std::make_shared<RepeatNode>(ds, 2); 143 ds = std::make_shared<MapNode>(ds, operations); 144 compare_dataset(ds); 145 } 146 147 // test cifar10 dataset and random tensor operations 148 TEST_F(MindDataTestDeserialize, TestDeserializeCifar10) { 149 MS_LOG(INFO) << "Doing MindDataTestDeserialize-Cifar10."; 150 std::string data_dir = "./data/dataset/testCifar10Data"; 151 std::string usage = "all"; 152 std::shared_ptr<DatasetCache> cache = nullptr; 153 std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10); 154 std::shared_ptr<DatasetNode> ds = std::make_shared<Cifar10Node>(data_dir, usage, sampler, cache); 155 std::vector<float> center = {50.0, 50.0}; 156 std::vector<uint8_t> threshold = {5, 5}; 157 std::vector<uint8_t> fill_value = {150, 150, 150}; 158 std::vector<uint8_t> bit_range = {5, 15}; 159 std::vector<float> degrees = {0.0, 0.0}; 160 std::vector<float> scale = {0.5, 0.5}; 161 std::vector<float> ratio = {0.5, 0.5}; 162 std::vector<int32_t> size = {224, 224}; 163 std::vector<int32_t> padding = {20, 20, 20, 20}; 164 std::vector<float_t> translate_range = {0.0, 0.0, 0.0, 0.0}; 165 std::vector<float_t> scale_range = {1.0, 1.0}; 166 std::vector<float_t> shear_ranges = {0.0, 0.0, 0.0, 0.0}; 167 InterpolationMode interpolation = InterpolationMode::kLinear; 168 std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::RandomRotationOperation>( 169 degrees, InterpolationMode::kNearestNeighbour, true, center, fill_value); 170 std::shared_ptr<TensorOperation> operation2 = std::make_shared<vision::RandomAffineOperation>( 171 degrees, translate_range, scale_range, shear_ranges, interpolation, fill_value); 172 std::shared_ptr<TensorOperation> operation3 = std::make_shared<vision::RandomColorOperation>(0.5, 10.5); 173 std::shared_ptr<TensorOperation> operation4 = 174 std::make_shared<vision::RandomCropDecodeResizeOperation>(size, scale, ratio, interpolation, 2); 175 std::shared_ptr<TensorOperation> operation5 = 176 std::make_shared<vision::RandomCropWithBBoxOperation>(size, padding, true, fill_value, BorderType::kConstant); 177 std::shared_ptr<TensorOperation> operation6 = std::make_shared<vision::RandomHorizontalFlipOperation>(0.1); 178 std::shared_ptr<TensorOperation> operation7 = std::make_shared<vision::RandomHorizontalFlipWithBBoxOperation>(0.1); 179 std::shared_ptr<TensorOperation> operation8 = std::make_shared<vision::RandomPosterizeOperation>(bit_range); 180 std::shared_ptr<TensorOperation> operation9 = std::make_shared<vision::RandomResizeOperation>(size); 181 std::shared_ptr<TensorOperation> operation10 = std::make_shared<vision::RandomResizeWithBBoxOperation>(size); 182 std::shared_ptr<TensorOperation> operation11 = 183 std::make_shared<vision::RandomResizedCropOperation>(size, scale, ratio, interpolation, 2); 184 std::shared_ptr<TensorOperation> operation12 = 185 std::make_shared<vision::RandomResizedCropWithBBoxOperation>(size, scale, ratio, interpolation, 2); 186 std::shared_ptr<TensorOperation> operation13 = 187 std::make_shared<vision::RandomRotationOperation>(degrees, interpolation, true, center, fill_value); 188 std::shared_ptr<TensorOperation> operation14 = std::make_shared<vision::RandomSharpnessOperation>(degrees); 189 std::shared_ptr<TensorOperation> operation15 = std::make_shared<vision::RandomSolarizeOperation>(threshold); 190 std::shared_ptr<TensorOperation> operation16 = std::make_shared<vision::RandomVerticalFlipOperation>(0.1); 191 std::shared_ptr<TensorOperation> operation17 = std::make_shared<vision::RandomVerticalFlipWithBBoxOperation>(0.1); 192 std::vector<std::shared_ptr<TensorOperation>> operations; 193 operations.push_back(operation1); 194 operations.push_back(operation2); 195 operations.push_back(operation3); 196 operations.push_back(operation4); 197 operations.push_back(operation5); 198 operations.push_back(operation6); 199 operations.push_back(operation7); 200 operations.push_back(operation8); 201 operations.push_back(operation9); 202 operations.push_back(operation10); 203 operations.push_back(operation11); 204 operations.push_back(operation12); 205 operations.push_back(operation13); 206 operations.push_back(operation14); 207 operations.push_back(operation15); 208 operations.push_back(operation16); 209 operations.push_back(operation17); 210 ds = std::make_shared<MapNode>(ds, operations); 211 ds = std::make_shared<BatchNode>(ds, 1, true); 212 ds = std::make_shared<SkipNode>(ds, 1); 213 compare_dataset(ds); 214 } 215 216 TEST_F(MindDataTestDeserialize, TestDeserializeCifar100) { 217 MS_LOG(INFO) << "Doing MindDataTestDeserialize-Cifar100."; 218 std::string data_dir = "./data/dataset/testCifar100Data"; 219 std::string usage = "all"; 220 std::shared_ptr<DatasetCache> cache = nullptr; 221 std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10); 222 std::shared_ptr<DatasetNode> ds = std::make_shared<Cifar100Node>(data_dir, usage, sampler, cache); 223 ds = std::make_shared<TakeNode>(ds, 6); 224 std::shared_ptr<TensorOperation> operation = std::make_shared<vision::HorizontalFlipOperation>(); 225 std::vector<std::shared_ptr<TensorOperation>> ops = {operation}; 226 ds = std::make_shared<MapNode>(ds, ops); 227 std::vector<std::shared_ptr<TensorOperation>> operations; 228 std::vector<int32_t> size = {32, 32}; 229 std::vector<int32_t> padding = {4, 4, 4, 4}; 230 bool pad_if_needed = false; 231 std::vector<uint8_t> fill_value = {4, 4, 4}; 232 InterpolationMode interpolation = InterpolationMode::kLinear; 233 std::shared_ptr<TensorOperation> operation1 = 234 std::make_shared<vision::RandomCropOperation>(size, padding, pad_if_needed, fill_value, BorderType::kConstant); 235 size = {224, 224}; 236 std::shared_ptr<TensorOperation> operation2 = std::make_shared<vision::ResizeOperation>(size, interpolation); 237 std::shared_ptr<TensorOperation> operation3 = std::make_shared<vision::RescaleOperation>(0.5, 0.0); 238 std::vector<float> mean = {0.49, 0.48, 0.46}; 239 std::vector<float> std = {0.20, 0.199, 0.201}; 240 std::shared_ptr<TensorOperation> operation4 = std::make_shared<vision::NormalizeOperation>(mean, std); 241 operations.push_back(operation1); 242 operations.push_back(operation2); 243 operations.push_back(operation3); 244 operations.push_back(operation4); 245 ds = std::make_shared<MapNode>(ds, operations); 246 ds = std::make_shared<BatchNode>(ds, 3, true); 247 ds = std::make_shared<RepeatNode>(ds, 1); 248 compare_dataset(ds); 249 } 250 251 TEST_F(MindDataTestDeserialize, TestDeserializeCSV) { 252 MS_LOG(INFO) << "Doing MindDataTestDeserialize-CSV."; 253 std::string data_file = "./data/dataset/testCSV/1.csv"; 254 std::vector<std::string> dataset_files = {data_file}; 255 char field_delim = ','; 256 std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"}; 257 std::vector<std::string> columns = {"col1", "col4", "col2"}; 258 std::vector<std::shared_ptr<CsvBase>> column_defaults = {}; 259 std::shared_ptr<DatasetCache> cache = nullptr; 260 std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10); 261 std::shared_ptr<DatasetNode> ds = std::make_shared<CSVNode>(dataset_files, field_delim, column_defaults, column_names, 262 3, ShuffleMode::kGlobal, 1, 0, cache); 263 ds = std::make_shared<ProjectNode>(ds, columns); 264 compare_dataset(ds); 265 } 266 267 TEST_F(MindDataTestDeserialize, TestDeserializeImageFolder) { 268 MS_LOG(INFO) << "Doing MindDataTestDeserialize-ImageFolder."; 269 std::string dataset_dir = "./data/dataset/testPK/data"; 270 std::shared_ptr<SamplerObj> child_sampler = std::make_shared<PKSamplerObj>(3, true, 1); 271 std::vector<double> weights = {1.0, 0.1, 0.02, 0.3, 0.4, 0.05, 1.2, 0.13, 0.14, 0.015, 0.16, 1.1}; 272 std::set<std::string> extensions = {}; 273 std::shared_ptr<DatasetCache> cache = nullptr; 274 std::map<std::string, int32_t> class_indexing = {}; 275 std::shared_ptr<SamplerObj> sampler = std::make_shared<WeightedRandomSamplerObj>(weights, 11); 276 sampler->AddChildSampler(child_sampler); 277 std::shared_ptr<DatasetNode> ds = 278 std::make_shared<ImageFolderNode>(dataset_dir, false, sampler, false, extensions, class_indexing, cache); 279 ds = std::make_shared<RepeatNode>(ds, 1); 280 std::vector<int32_t> size = {224, 224}; 281 std::vector<float> scale = {0.5, 0.5}; 282 std::vector<float> ratio = {0.5, 0.5}; 283 std::vector<float> center = {50.0, 50.0}; 284 std::vector<uint8_t> fill_value = {150, 150, 150}; 285 InterpolationMode interpolation = InterpolationMode::kLinear; 286 std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::SoftDvppDecodeResizeJpegOperation>(size); 287 std::vector<std::shared_ptr<TensorOperation>> ops = {operation1}; 288 ds = std::make_shared<MapNode>(ds, ops); 289 std::vector<std::shared_ptr<TensorOperation>> operations; 290 std::shared_ptr<TensorOperation> operation2 = 291 std::make_shared<vision::SoftDvppDecodeRandomCropResizeJpegOperation>(size, scale, ratio, 2); 292 std::shared_ptr<TensorOperation> operation3 = 293 std::make_shared<vision::RotateOperation>(0.5, interpolation, true, center, fill_value); 294 operations.push_back(operation2); 295 operations.push_back(operation3); 296 ds = std::make_shared<MapNode>(ds, operations); 297 ds = std::make_shared<BatchNode>(ds, 2, true); 298 compare_dataset(ds); 299 } 300 301 TEST_F(MindDataTestDeserialize, TestDeserializeManifest) { 302 MS_LOG(INFO) << "Doing MindDataTestDeserialize-Manifest."; 303 std::string data_file = "./data/dataset/testManifestData/cpp.json"; 304 std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10); 305 std::map<std::string, int32_t> class_indexing = {}; 306 std::shared_ptr<DatasetCache> cache = nullptr; 307 std::shared_ptr<DatasetNode> ds = 308 std::make_shared<ManifestNode>(data_file, "train", sampler, class_indexing, false, cache); 309 std::vector<int32_t> coordinates = {50, 50}; 310 std::vector<int32_t> size = {224, 224}; 311 std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::CropOperation>(coordinates, size); 312 std::shared_ptr<TensorOperation> operation2 = std::make_shared<vision::RgbToBgrOperation>(); 313 std::shared_ptr<TensorOperation> operation3 = std::make_shared<vision::RgbToGrayOperation>(); 314 std::shared_ptr<TensorOperation> operation4 = 315 std::make_shared<vision::SlicePatchesOperation>(5, 5, SliceMode::kDrop, 1); 316 std::shared_ptr<TensorOperation> operation5 = std::make_shared<vision::VerticalFlipOperation>(); 317 std::vector<std::shared_ptr<TensorOperation>> operations; 318 operations.push_back(operation1); 319 operations.push_back(operation2); 320 operations.push_back(operation3); 321 operations.push_back(operation4); 322 operations.push_back(operation5); 323 ds = std::make_shared<MapNode>(ds, operations); 324 ds = std::make_shared<BatchNode>(ds, 2, false); 325 compare_dataset(ds); 326 } 327 328 TEST_F(MindDataTestDeserialize, TestDeserializeVOC) { 329 MS_LOG(INFO) << "Doing MindDataTestDeserialize-VOC."; 330 std::string dataset_dir = "./data/dataset/testVOC2012"; 331 std::vector<int64_t> indices = {0, 1}; 332 std::shared_ptr<SamplerObj> sampler = std::make_shared<SubsetRandomSamplerObj>(indices, 3); 333 std::string task = "Detection"; 334 std::string usage = "train"; 335 std::map<std::string, int32_t> class_indexing = {}; 336 std::shared_ptr<DatasetCache> cache = nullptr; 337 std::shared_ptr<DatasetNode> ds = 338 std::make_shared<VOCNode>(dataset_dir, task, usage, class_indexing, true, sampler, cache); 339 std::vector<float> brightness = {0.5, 0.5}; 340 std::vector<float> contrast = {1.0, 1.0}; 341 std::vector<float> hue = {0.0, 0.0}; 342 std::vector<float> saturation = {1.0, 1.0}; 343 std::shared_ptr<TensorOperation> operation = 344 std::make_shared<vision::RandomColorAdjustOperation>(brightness, contrast, saturation, hue); 345 std::vector<std::shared_ptr<TensorOperation>> ops = {operation}; 346 ds = std::make_shared<MapNode>(ds, ops); 347 ds = std::make_shared<SkipNode>(ds, 2); 348 compare_dataset(ds); 349 } 350 351 TEST_F(MindDataTestDeserialize, TestDeserializeCLUE) { 352 MS_LOG(INFO) << "Doing MindDataTestDeserialize-CLUE."; 353 std::string train_file = "./data/dataset/testCLUE/afqmc/train.json"; 354 std::string task = "AFQMC"; 355 std::string usage = "train"; 356 std::vector<std::string> files = {train_file}; 357 std::shared_ptr<DatasetCache> cache = nullptr; 358 std::shared_ptr<DatasetNode> ds = std::make_shared<CLUENode>(files, task, usage, 1, ShuffleMode::kFalse, 1, 0, cache); 359 ds = std::make_shared<RepeatNode>(ds, 1); 360 std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::DecodeOperation>(true); 361 std::vector<std::shared_ptr<TensorOperation>> ops = {operation1}; 362 ds = std::make_shared<MapNode>(ds, ops); 363 compare_dataset(ds); 364 } 365 366 TEST_F(MindDataTestDeserialize, TestDeserializeCoco) { 367 MS_LOG(INFO) << "Doing MindDataTestDeserialize-Coco."; 368 std::string folder_path = "./data/dataset/testCOCO/train"; 369 std::string annotation_file = "./data/dataset/testCOCO/annotations/train.json"; 370 std::string task = "Detection"; 371 std::vector<int64_t> indices = {0, 1}; 372 std::shared_ptr<SamplerObj> sampler = std::make_shared<SubsetRandomSamplerObj>(indices, 3); 373 std::shared_ptr<DatasetCache> cache = nullptr; 374 std::shared_ptr<DatasetNode> ds = 375 std::make_shared<CocoNode>(folder_path, annotation_file, task, true, sampler, cache, false); 376 std::vector<uint8_t> fill_value = {150, 150, 150}; 377 std::vector<float> degrees = {0.0, 0.0}; 378 std::vector<float> scale = {0.5, 0.5}; 379 std::vector<float> ratio = {0.5, 0.5}; 380 std::vector<int32_t> size = {224, 224}; 381 std::vector<int32_t> padding = {20, 20, 20, 20}; 382 InterpolationMode interpolation = InterpolationMode::kLinear; 383 std::shared_ptr<TensorOperation> operation1 = 384 std::make_shared<vision::RandomCropDecodeResizeOperation>(size, scale, ratio, interpolation, 2); 385 std::shared_ptr<TensorOperation> operation2 = 386 std::make_shared<vision::RandomCropWithBBoxOperation>(size, padding, true, fill_value, BorderType::kConstant); 387 std::shared_ptr<TensorOperation> operation3 = std::make_shared<vision::RandomHorizontalFlipOperation>(0.1); 388 std::shared_ptr<TensorOperation> operation4 = std::make_shared<vision::RandomHorizontalFlipWithBBoxOperation>(0.1); 389 std::vector<std::shared_ptr<TensorOperation>> operations; 390 operations.push_back(operation1); 391 operations.push_back(operation2); 392 operations.push_back(operation3); 393 operations.push_back(operation4); 394 ds = std::make_shared<MapNode>(ds, operations); 395 compare_dataset(ds); 396 } 397 398 TEST_F(MindDataTestDeserialize, TestDeserializeTFRecord) { 399 MS_LOG(INFO) << "Doing MindDataTestDeserialize-TFRecord."; 400 int num_samples = 12; 401 int32_t num_shards = 1; 402 int32_t shard_id = 0; 403 bool shard_equal_rows = false; 404 std::shared_ptr<DatasetCache> cache = nullptr; 405 std::vector<std::string> columns_list = {}; 406 std::vector<std::string> dataset_files = {"./data/dataset/testTFTestAllTypes/test.data"}; 407 408 std::shared_ptr<SchemaObj> schema = Schema(); 409 ASSERT_OK(schema->add_column("col1", mindspore::DataType::kNumberTypeInt32, {4})); 410 ASSERT_OK(schema->add_column("col2", mindspore::DataType::kNumberTypeInt64, {4})); 411 412 std::shared_ptr<DatasetNode> ds = 413 std::make_shared<TFRecordNode>(dataset_files, schema, columns_list, num_samples, ShuffleMode::kFiles, num_shards, 414 shard_id, shard_equal_rows, cache); 415 ds = std::make_shared<ShuffleNode>(ds, 10000, true); 416 std::vector<std::string> input_columns = {"col_sint16", "col_sint32", "col_sint64", "col_float", 417 "col_1d", "col_2d", "col_3d", "col_binary"}; 418 std::vector<std::string> output_columns = {"column_sint16", "column_sint32", "column_sint64", "column_float", 419 "column_1d", "column_2d", "column_3d", "column_binary"}; 420 std::shared_ptr<TensorOperation> operation = std::make_shared<vision::InvertOperation>(); 421 std::vector<std::shared_ptr<TensorOperation>> ops = {operation}; 422 ds = std::make_shared<MapNode>(ds, ops, input_columns, output_columns); 423 std::string train_file = "./data/dataset/testCLUE/afqmc/train.json"; 424 std::string task1 = "AFQMC"; 425 std::string usage = "train"; 426 std::vector<std::string> files = {train_file}; 427 std::shared_ptr<DatasetNode> ds_child1 = 428 std::make_shared<CLUENode>(files, task1, usage, 0, ShuffleMode::kFalse, 1, 0, cache); 429 std::vector<std::string> dataset_files2 = {"./data/dataset/testTextFileDataset/1.txt"}; 430 std::shared_ptr<DatasetNode> ds_child2 = 431 std::make_shared<TextFileNode>(dataset_files2, 2, ShuffleMode::kFiles, 1, 0, cache); 432 std::vector<std::shared_ptr<DatasetNode>> datasets = {ds, ds_child1, ds_child2}; 433 ds = std::make_shared<ZipNode>(datasets); 434 compare_dataset(ds); 435 } 436 437 TEST_F(MindDataTestDeserialize, TestDeserializeTextfile) { 438 MS_LOG(INFO) << "Doing MindDataTestDeserialize-Textfile."; 439 std::vector<std::string> dataset_files = {"./data/dataset/testTextFileDataset/1.txt"}; 440 std::shared_ptr<DatasetCache> cache = nullptr; 441 std::shared_ptr<DatasetNode> ds = std::make_shared<TextFileNode>(dataset_files, 2, ShuffleMode::kFiles, 1, 0, cache); 442 std::shared_ptr<TensorOperation> operation = std::make_shared<vision::InvertOperation>(); 443 std::vector<std::shared_ptr<TensorOperation>> ops = {operation}; 444 ds = std::make_shared<MapNode>(ds, ops); 445 ds = std::make_shared<BatchNode>(ds, 10, true); 446 compare_dataset(ds); 447 } 448 449 TEST_F(MindDataTestDeserialize, TestDeserializeInvalidJson) { 450 std::shared_ptr<DatasetNode> ds; 451 // check the invalid json path would return error 452 ASSERT_ERROR(Serdes::Deserialize("invalid_dataset.json", &ds)); 453 // check the invalid json object would return error 454 ASSERT_ERROR(Serdes::Deserialize("./data/dataset/testDataset1/datasetTestInvalidJson.json", &ds)); 455 EXPECT_EQ(ds, nullptr); 456 } 457 458 TEST_F(MindDataTestDeserialize, TestDeserializeFill) { 459 MS_LOG(INFO) << "Doing MindDataTestDeserialize-Fill."; 460 std::vector<std::string> dataset_files = {"./data/dataset/testTextFileDataset/1.txt"}; 461 std::shared_ptr<DatasetCache> cache = nullptr; 462 std::shared_ptr<DatasetNode> ds = std::make_shared<TextFileNode>(dataset_files, 2, ShuffleMode::kFiles, 1, 0, cache); 463 std::shared_ptr<Tensor> fill_value; 464 ASSERT_OK(Tensor::CreateScalar(true, &fill_value)); 465 std::shared_ptr<TensorOperation> operation1 = std::make_shared<transforms::FillOperation>(fill_value); 466 std::shared_ptr<TensorOperation> operation2 = std::make_shared<text::ToNumberOperation>("int32_t"); 467 std::vector<std::shared_ptr<TensorOperation>> ops = {operation1, operation2}; 468 ds = std::make_shared<MapNode>(ds, ops); 469 ds = std::make_shared<TransferNode>(ds, "queue", "type", 1, true, 10, true); 470 compare_dataset(ds); 471 } 472 473 TEST_F(MindDataTestDeserialize, TestDeserializeTensor) { 474 MS_LOG(INFO) << "Doing MindDataTestDeserialize-Tensor."; 475 std::shared_ptr<Tensor> test_tensor; 476 std::vector<float> input = {1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 1.2, 0.7, 0.8, 0.9, 1.0, 2.0, 1.3, 3.0, 4.0}; 477 ASSERT_OK(Tensor::CreateFromVector(input, TensorShape{3, 5}, &test_tensor)); 478 nlohmann::json json_obj; 479 ASSERT_OK(test_tensor->to_json(&json_obj)); 480 std::shared_ptr<Tensor> test_tensor1; 481 ASSERT_OK(Tensor::from_json(json_obj, &test_tensor1)); 482 nlohmann::json json_obj1; 483 ASSERT_OK(test_tensor1->to_json(&json_obj1)); 484 std::stringstream json_ss; 485 json_ss << json_obj; 486 std::stringstream json_ss1; 487 json_ss1 << json_obj1; 488 EXPECT_EQ(json_ss.str(), json_ss1.str()); 489 } 490 491 // Helper function to get the session id from SESSION_ID env variable 492 Status GetSessionFromEnv(session_id_type *session_id); 493 494 TEST_F(MindDataTestDeserialize, DISABLED_TestDeserializeCache) { 495 MS_LOG(INFO) << "Doing MindDataTestDeserialize-Cache."; 496 std::string data_dir = "./data/dataset/testCache"; 497 std::string usage = "all"; 498 session_id_type env_session; 499 ASSERT_TRUE(GetSessionFromEnv(&env_session)); 500 std::shared_ptr<DatasetCache> some_cache = CreateDatasetCache(env_session, 0, false, "127.0.0.1", 50052, 1, 1); 501 502 std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10); 503 std::shared_ptr<DatasetNode> ds = std::make_shared<Cifar10Node>(data_dir, usage, sampler, some_cache); 504 compare_dataset(ds); 505 } 506 507 TEST_F(MindDataTestDeserialize, TestDeserializeConcatAlbumFlickr) { 508 MS_LOG(INFO) << "Doing MindDataTestDeserialize-ConcatAlbumFlickr."; 509 std::string dataset_dir = "./data/dataset/testAlbum"; 510 std::vector<std::string> column_names = {"col1", "col2", "col3"}; 511 bool decode = false; 512 std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10); 513 std::string data_schema = "./data/dataset/testAlbum/datasetSchema.json"; 514 std::shared_ptr<DatasetNode> ds = 515 std::make_shared<AlbumNode>(dataset_dir, data_schema, column_names, decode, sampler, nullptr); 516 std::shared_ptr<TensorOperation> operation = std::make_shared<vision::AdjustGammaOperation>(0.5, 0.5); 517 std::vector<std::shared_ptr<TensorOperation>> ops = {operation}; 518 ds = std::make_shared<MapNode>(ds, ops); 519 std::string dataset_path = "./data/dataset/testFlickrData/flickr30k/flickr30k-images"; 520 std::string annotation_file = "./data/dataset/testFlickrData/flickr30k/test1.token"; 521 std::shared_ptr<DatasetNode> ds_child1 = 522 std::make_shared<FlickrNode>(dataset_path, annotation_file, decode, sampler, nullptr); 523 std::vector<std::shared_ptr<DatasetNode>> datasets = {ds, ds_child1}; 524 std::pair<int, int> pair = std::make_pair(1, 1); 525 std::vector<std::pair<int, int>> children_flag_and_nums = {pair}; 526 std::vector<std::pair<int, int>> children_start_end_index = {pair}; 527 ds = std::make_shared<ConcatNode>(datasets, sampler, children_flag_and_nums, children_start_end_index); 528 compare_dataset(ds); 529 } 530 531 TEST_F(MindDataTestDeserialize, TestDeserializePyFunc) { 532 MS_LOG(INFO) << "Doing MindDataTestDeserialize-PyFunc."; 533 std::shared_ptr<DatasetNode> ds1; 534 ASSERT_OK(Serdes::Deserialize("./data/dataset/tf_file_dataset/pyvision_dataset_pipeline.json", &ds1)); 535 EXPECT_NE(ds1, nullptr); 536 }