1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <stdio.h>
17 #include "common/common.h"
18 #include "minddata/dataset/include/dataset/datasets.h"
19 #include "minddata/dataset/include/dataset/transforms.h"
20
21 using namespace mindspore::dataset;
22
23 class MindDataTestPipeline : public UT::DatasetOpTesting {
24 protected:
25 };
26
TEST_F(MindDataTestPipeline,TestSaveCifar10AndLoad)27 TEST_F(MindDataTestPipeline, TestSaveCifar10AndLoad) {
28 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSaveCifar10AndLoad(single mindrecord file).";
29
30 // Stage 1: load original dataset
31 // Create a Cifar10 Dataset
32 std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
33 std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", std::make_shared<SequentialSampler>(0, 10));
34 EXPECT_NE(ds, nullptr);
35
36 // Create an iterator over the result of the above dataset
37 // This will trigger the creation of the Execution Tree and launch it.
38 std::shared_ptr<Iterator> iter = ds->CreateIterator();
39 EXPECT_NE(iter, nullptr);
40
41 // Iterate the dataset and get each row
42 std::unordered_map<std::string, mindspore::MSTensor> row;
43 std::vector<mindspore::MSTensor> original_data;
44 ASSERT_OK(iter->GetNextRow(&row));
45
46 // Save original data for comparison
47 uint64_t i = 0;
48 while (row.size() != 0) {
49 auto label = row["label"];
50 original_data.push_back(label);
51 TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
52
53 ASSERT_OK(iter->GetNextRow(&row));
54 i++;
55 }
56
57 // Expect 10 samples
58 EXPECT_EQ(i, 10);
59 // Manually terminate the pipeline
60 iter->Stop();
61
62 // Stage 2: Save data processed by the dataset pipeline
63 // Create an iterator over the result of the above dataset
64 // This will trigger the creation of the Execution Tree and launch it.
65 std::string temp_file = datasets_root_path_ + "/testCifar10Data/mind.mind";
66 std::string temp_file_db = datasets_root_path_ + "/testCifar10Data/mind.mind.db";
67 bool rc = ds->Save(temp_file);
68 // if save fails, no need to continue the execution
69 // save could fail if temp_file already exists
70 ASSERT_EQ(rc, true);
71
72 // Stage 3: Load dataset from file output by stage 2
73 // Create a MindData Dataset
74 std::shared_ptr<Dataset> ds_minddata = MindData(temp_file, {}, std::make_shared<SequentialSampler>(0, 10));
75
76 // Create objects for the tensor ops
77 // uint32 will be casted to int64 implicitly in mindrecord file, so we have to cast it back to uint32
78 std::shared_ptr<TensorTransform> type_cast =
79 std::make_shared<transforms::TypeCast>(mindspore::DataType::kNumberTypeUInt32);
80 EXPECT_NE(type_cast, nullptr);
81
82 // Create a Map operation on ds
83 ds_minddata = ds_minddata->Map({type_cast}, {"label"});
84 EXPECT_NE(ds_minddata, nullptr);
85
86 // Create an iterator over the result of the above dataset
87 // This will trigger the creation of the Execution Tree and launch it.
88 std::shared_ptr<Iterator> iter_minddata = ds_minddata->CreateIterator();
89 EXPECT_NE(iter_minddata, nullptr);
90
91 // Iterate the dataset and get each row
92 std::unordered_map<std::string, mindspore::MSTensor> row_minddata;
93 ASSERT_OK(iter_minddata->GetNextRow(&row_minddata));
94
95 // Check column name for each row
96 EXPECT_NE(row_minddata.find("image"), row_minddata.end());
97 EXPECT_NE(row_minddata.find("label"), row_minddata.end());
98
99 // Expect the output data is same with original_data
100 uint64_t j = 0;
101 while (row_minddata.size() != 0) {
102 auto label = row_minddata["label"];
103 EXPECT_MSTENSOR_EQ(original_data[j], label);
104 TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
105
106 ASSERT_OK(iter_minddata->GetNextRow(&row_minddata));
107 j++;
108 }
109
110 // Expect 10 samples
111 EXPECT_EQ(j, 10);
112 // Manually terminate the pipeline
113 iter_minddata->Stop();
114
115 // Delete temp file
116 EXPECT_EQ(remove(temp_file.c_str()), 0);
117 EXPECT_EQ(remove(temp_file_db.c_str()), 0);
118 }
119
TEST_F(MindDataTestPipeline,TestSaveFail)120 TEST_F(MindDataTestPipeline, TestSaveFail) {
121 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestSaveFail with incorrect param.";
122
123 // Create a Cifar10 Dataset
124 std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
125 std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", std::make_shared<SequentialSampler>(0, 10));
126 EXPECT_NE(ds, nullptr);
127
128 // fail with invalid dataset_path
129 std::string temp_file1 = "";
130 bool rc1 = ds->Save(temp_file1);
131 EXPECT_EQ(rc1, false);
132
133 // fail with invalid dataset_path
134 std::string temp_file2 = datasets_root_path_ + "/testCifar10Data/";
135 bool rc2 = ds->Save(temp_file2);
136 EXPECT_EQ(rc2, false);
137
138 // fail with invalid num_files
139 std::string temp_file3 = datasets_root_path_ + "/testCifar10Data/mind.mind";
140 bool rc3 = ds->Save(temp_file3, 0);
141 EXPECT_EQ(rc3, false);
142
143 // fail with invalid num_files
144 std::string temp_file4 = datasets_root_path_ + "/testCifar10Data/mind.mind";
145 bool rc4 = ds->Save(temp_file4, 1001);
146 EXPECT_EQ(rc4, false);
147
148 // fail with invalid dataset_type
149 std::string temp_file5 = datasets_root_path_ + "/testCifar10Data/mind.mind";
150 bool rc5 = ds->Save(temp_file5, 5, "tfrecord");
151 EXPECT_EQ(rc5, false);
152 }
153