1 /** 2 * Copyright 2020-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include <stdio.h> 17 #include "common/common.h" 18 #include "minddata/dataset/include/dataset/datasets.h" 19 #include "minddata/dataset/include/dataset/transforms.h" 20 21 using namespace mindspore::dataset; 22 23 class MindDataTestPipeline : public UT::DatasetOpTesting { 24 protected: 25 }; 26 27 TEST_F(MindDataTestPipeline, TestSaveCifar10AndLoad) { 28 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSaveCifar10AndLoad(single mindrecord file)."; 29 30 // Stage 1: load original dataset 31 // Create a Cifar10 Dataset 32 std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; 33 std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", std::make_shared<SequentialSampler>(0, 10)); 34 EXPECT_NE(ds, nullptr); 35 36 // Create an iterator over the result of the above dataset 37 // This will trigger the creation of the Execution Tree and launch it. 38 std::shared_ptr<Iterator> iter = ds->CreateIterator(); 39 EXPECT_NE(iter, nullptr); 40 41 // Iterate the dataset and get each row 42 std::unordered_map<std::string, mindspore::MSTensor> row; 43 std::vector<mindspore::MSTensor> original_data; 44 ASSERT_OK(iter->GetNextRow(&row)); 45 46 // Save original data for comparison 47 uint64_t i = 0; 48 while (row.size() != 0) { 49 auto label = row["label"]; 50 original_data.push_back(label); 51 TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label); 52 53 ASSERT_OK(iter->GetNextRow(&row)); 54 i++; 55 } 56 57 // Expect 10 samples 58 EXPECT_EQ(i, 10); 59 // Manually terminate the pipeline 60 iter->Stop(); 61 62 // Stage 2: Save data processed by the dataset pipeline 63 // Create an iterator over the result of the above dataset 64 // This will trigger the creation of the Execution Tree and launch it. 65 std::string temp_file = datasets_root_path_ + "/testCifar10Data/mind.mind"; 66 std::string temp_file_db = datasets_root_path_ + "/testCifar10Data/mind.mind.db"; 67 bool rc = ds->Save(temp_file); 68 // if save fails, no need to continue the execution 69 // save could fail if temp_file already exists 70 ASSERT_EQ(rc, true); 71 72 // Stage 3: Load dataset from file output by stage 2 73 // Create a MindData Dataset 74 std::shared_ptr<Dataset> ds_minddata = MindData(temp_file, {}, std::make_shared<SequentialSampler>(0, 10)); 75 76 // Create objects for the tensor ops 77 // uint32 will be casted to int64 implicitly in mindrecord file, so we have to cast it back to uint32 78 std::shared_ptr<TensorTransform> type_cast = 79 std::make_shared<transforms::TypeCast>(mindspore::DataType::kNumberTypeUInt32); 80 EXPECT_NE(type_cast, nullptr); 81 82 // Create a Map operation on ds 83 ds_minddata = ds_minddata->Map({type_cast}, {"label"}); 84 EXPECT_NE(ds_minddata, nullptr); 85 86 // Create an iterator over the result of the above dataset 87 // This will trigger the creation of the Execution Tree and launch it. 88 std::shared_ptr<Iterator> iter_minddata = ds_minddata->CreateIterator(); 89 EXPECT_NE(iter_minddata, nullptr); 90 91 // Iterate the dataset and get each row 92 std::unordered_map<std::string, mindspore::MSTensor> row_minddata; 93 ASSERT_OK(iter_minddata->GetNextRow(&row_minddata)); 94 95 // Check column name for each row 96 EXPECT_NE(row_minddata.find("image"), row_minddata.end()); 97 EXPECT_NE(row_minddata.find("label"), row_minddata.end()); 98 99 // Expect the output data is same with original_data 100 uint64_t j = 0; 101 while (row_minddata.size() != 0) { 102 auto label = row_minddata["label"]; 103 EXPECT_MSTENSOR_EQ(original_data[j], label); 104 TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label); 105 106 ASSERT_OK(iter_minddata->GetNextRow(&row_minddata)); 107 j++; 108 } 109 110 // Expect 10 samples 111 EXPECT_EQ(j, 10); 112 // Manually terminate the pipeline 113 iter_minddata->Stop(); 114 115 // Delete temp file 116 EXPECT_EQ(remove(temp_file.c_str()), 0); 117 EXPECT_EQ(remove(temp_file_db.c_str()), 0); 118 } 119 120 TEST_F(MindDataTestPipeline, TestSaveFail) { 121 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSaveFail with incorrect param."; 122 123 // Create a Cifar10 Dataset 124 std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; 125 std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", std::make_shared<SequentialSampler>(0, 10)); 126 EXPECT_NE(ds, nullptr); 127 128 // fail with invalid dataset_path 129 std::string temp_file1 = ""; 130 bool rc1 = ds->Save(temp_file1); 131 EXPECT_EQ(rc1, false); 132 133 // fail with invalid dataset_path 134 std::string temp_file2 = datasets_root_path_ + "/testCifar10Data/"; 135 bool rc2 = ds->Save(temp_file2); 136 EXPECT_EQ(rc2, false); 137 138 // fail with invalid num_files 139 std::string temp_file3 = datasets_root_path_ + "/testCifar10Data/mind.mind"; 140 bool rc3 = ds->Save(temp_file3, 0); 141 EXPECT_EQ(rc3, false); 142 143 // fail with invalid num_files 144 std::string temp_file4 = datasets_root_path_ + "/testCifar10Data/mind.mind"; 145 bool rc4 = ds->Save(temp_file4, 1001); 146 EXPECT_EQ(rc4, false); 147 148 // fail with invalid dataset_type 149 std::string temp_file5 = datasets_root_path_ + "/testCifar10Data/mind.mind"; 150 bool rc5 = ds->Save(temp_file5, 5, "tfrecord"); 151 EXPECT_EQ(rc5, false); 152 } 153