1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "common/common.h"
18 #include "minddata/dataset/include/dataset/config.h"
19 #include "minddata/dataset/include/dataset/datasets.h"
20
21 using namespace mindspore::dataset;
22 using mindspore::dataset::ShuffleMode;
23 using mindspore::dataset::Tensor;
24
25 class MindDataTestPipeline : public UT::DatasetOpTesting {
26 protected:
27 };
28
TEST_F(MindDataTestPipeline,TestConfigSetting)29 TEST_F(MindDataTestPipeline, TestConfigSetting) {
30 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConfigSetting.";
31 // Test basic configuration setting
32
33 // Save original configuration values
34 auto original_num_parallel_workers = config::get_num_parallel_workers();
35 auto original_prefetch_size = config::get_prefetch_size();
36 auto original_seed = config::get_seed();
37 auto original_monitor_sampling_interval = config::get_monitor_sampling_interval();
38
39 // Load configuration from file
40 std::string config_file_path = datasets_root_path_ + "/declient.cfg";
41 auto load_status = config::load(config_file_path);
42 EXPECT_EQ(load_status, true);
43
44 // Test configuration loaded
45 EXPECT_EQ(config::get_num_parallel_workers(), 8);
46 EXPECT_EQ(config::get_prefetch_size(), 16);
47 EXPECT_EQ(config::get_seed(), 5489);
48 EXPECT_EQ(config::get_monitor_sampling_interval(), 15);
49
50 // Set configuration
51 auto status_set_num_parallel_workers = config::set_num_parallel_workers(2);
52 auto status_set_prefetch_size = config::set_prefetch_size(4);
53 auto status_set_seed = config::set_seed(5);
54 auto status_set_monitor_sampling_interval = config::set_monitor_sampling_interval(45);
55 EXPECT_EQ(status_set_num_parallel_workers, true);
56 EXPECT_EQ(status_set_prefetch_size, true);
57 EXPECT_EQ(status_set_seed, true);
58 EXPECT_EQ(status_set_monitor_sampling_interval, true);
59
60 // Test configuration set
61 EXPECT_EQ(config::get_num_parallel_workers(), 2);
62 EXPECT_EQ(config::get_prefetch_size(), 4);
63 EXPECT_EQ(config::get_seed(), 5);
64 EXPECT_EQ(config::get_monitor_sampling_interval(), 45);
65
66 // Restore original configuration values
67 config::set_num_parallel_workers(original_num_parallel_workers);
68 config::set_prefetch_size(original_prefetch_size);
69 config::set_seed(original_seed);
70 config::set_monitor_sampling_interval(original_monitor_sampling_interval);
71 }
72
TEST_F(MindDataTestPipeline,TestConfigParamCheck)73 TEST_F(MindDataTestPipeline, TestConfigParamCheck) {
74 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConfigParamCheck.";
75 // Test configuration setting with wrong parameter
76
77 // Save original configuration values
78 auto original_num_parallel_workers = config::get_num_parallel_workers();
79 auto original_prefetch_size = config::get_prefetch_size();
80 auto original_seed = config::get_seed();
81 auto original_monitor_sampling_interval = config::get_monitor_sampling_interval();
82
83 // Load configuration from file with wrong path
84 std::string config_file_path = datasets_root_path_ + "/not_exist.cfg";
85 auto load_status = config::load(config_file_path);
86 EXPECT_EQ(load_status, false);
87
88 // Set configuration with wrong parameter
89 auto status_set_num_parallel_workers = config::set_num_parallel_workers(0);
90 auto status_set_prefetch_size = config::set_prefetch_size(0);
91 auto status_set_seed = config::set_seed(-1);
92 auto status_set_monitor_sampling_interval = config::set_monitor_sampling_interval(0);
93 EXPECT_EQ(status_set_num_parallel_workers, false);
94 EXPECT_EQ(status_set_prefetch_size, false);
95 EXPECT_EQ(status_set_seed, false);
96 EXPECT_EQ(status_set_monitor_sampling_interval, false);
97
98 // Restore original configuration values
99 config::set_num_parallel_workers(original_num_parallel_workers);
100 config::set_prefetch_size(original_prefetch_size);
101 config::set_seed(original_seed);
102 config::set_monitor_sampling_interval(original_monitor_sampling_interval);
103 }
104
TEST_F(MindDataTestPipeline,TestShuffleWithSeed)105 TEST_F(MindDataTestPipeline, TestShuffleWithSeed) {
106 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestShuffleWithSeed.";
107 // Test deterministic shuffle with setting the seed
108
109 // Save and set the seed
110 uint32_t original_seed = config::get_seed();
111 uint32_t original_num_parallel_workers = config::get_num_parallel_workers();
112 MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers;
113 config::set_seed(654);
114 config::set_num_parallel_workers(1);
115
116 // Create a TextFile Dataset with single text file which has three samples
117 std::string text_file = datasets_root_path_ + "/testTextFileDataset/1.txt";
118 std::shared_ptr<Dataset> ds = TextFile({text_file}, 0, ShuffleMode::kFalse);
119 EXPECT_NE(ds, nullptr);
120
121 // Shuffle the dataset with buffer_size=3
122 ds = ds->Shuffle(3);
123 EXPECT_NE(ds, nullptr);
124
125 // Create an iterator over the result of the above dataset.
126 // This will trigger the creation of the Execution Tree and launch it.
127 std::shared_ptr<Iterator> iter = ds->CreateIterator();
128 EXPECT_NE(iter, nullptr);
129
130 // Iterate the dataset and get each row
131 std::unordered_map<std::string, mindspore::MSTensor> row;
132 ASSERT_OK(iter->GetNextRow(&row));
133 EXPECT_NE(row.find("text"), row.end());
134
135 std::vector<std::string> expected_result = {"Good luck to everyone.", "Be happy every day.", "This is a text file."};
136
137 uint64_t i = 0;
138 while (row.size() != 0) {
139 auto text = row["text"];
140
141 std::shared_ptr<Tensor> de_text;
142 ASSERT_OK(Tensor::CreateFromMSTensor(text, &de_text));
143 std::string_view sv;
144 ASSERT_OK(de_text->GetItemAt(&sv, {}));
145 std::string ss(sv);
146 MS_LOG(INFO) << "Text length: " << ss.length() << ", Text: " << ss.substr(0, 50);
147 // Compare against expected result
148 EXPECT_STREQ(ss.c_str(), expected_result[i].c_str());
149
150 i++;
151 ASSERT_OK(iter->GetNextRow(&row));
152 }
153
154 // Expect 3 samples
155 EXPECT_EQ(i, 3);
156
157 // Manually terminate the pipeline
158 iter->Stop();
159
160 // Restore configuration
161 config::set_seed(original_seed);
162 config::set_num_parallel_workers(original_num_parallel_workers);
163 }
164
TEST_F(MindDataTestPipeline,TestCallShuffleTwice)165 TEST_F(MindDataTestPipeline, TestCallShuffleTwice) {
166 MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCallShuffleTwice.";
167 // Test shuffle and repeat with setting the seed.
168 // The second copy will be different from the first one because results will be different when calling shuffle twice.
169
170 // Save and set the seed
171 uint32_t original_seed = config::get_seed();
172 uint32_t original_num_parallel_workers = config::get_num_parallel_workers();
173 MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers;
174 config::set_seed(654);
175 config::set_num_parallel_workers(1);
176
177 // Create a TextFile Dataset with single text file which has three samples
178 std::string text_file = datasets_root_path_ + "/testTextFileDataset/1.txt";
179 std::shared_ptr<Dataset> ds = TextFile({text_file}, 0, ShuffleMode::kFalse);
180 EXPECT_NE(ds, nullptr);
181
182 // Shuffle the dataset with buffer_size=3
183 ds = ds->Shuffle(3);
184 EXPECT_NE(ds, nullptr);
185
186 // Repeat the dataset twice
187 ds = ds->Repeat(2);
188 EXPECT_NE(ds, nullptr);
189
190 // Create an iterator over the result of the above dataset.
191 // This will trigger the creation of the Execution Tree and launch it.
192 std::shared_ptr<Iterator> iter = ds->CreateIterator();
193 EXPECT_NE(iter, nullptr);
194
195 // Iterate the dataset and get each row
196 std::unordered_map<std::string, mindspore::MSTensor> row;
197 ASSERT_OK(iter->GetNextRow(&row));
198 EXPECT_NE(row.find("text"), row.end());
199
200 std::vector<std::string> first_copy;
201 std::vector<std::string> second_copy;
202
203 uint64_t i = 0;
204 while (row.size() != 0) {
205 auto text = row["text"];
206 std::shared_ptr<Tensor> de_text;
207 ASSERT_OK(Tensor::CreateFromMSTensor(text, &de_text));
208 std::string_view sv;
209 ASSERT_OK(de_text->GetItemAt(&sv, {}));
210 std::string ss(sv);
211 MS_LOG(INFO) << "Text length: " << ss.length() << ", Text: " << ss.substr(0, 50);
212
213 // The first three samples are the first copy and the rest are the second
214 if (i < 3) {
215 first_copy.push_back(ss);
216 } else {
217 second_copy.push_back(ss);
218 }
219
220 i++;
221 ASSERT_OK(iter->GetNextRow(&row));
222 }
223
224 // Expect 6 samples
225 EXPECT_EQ(i, 6);
226
227 // Compare the two copies which are deterministic difference
228 for (int j = 0; j < 3; j++) {
229 EXPECT_STRNE(first_copy.at(j).c_str(), second_copy.at(j).c_str());
230 }
231
232 // Manually terminate the pipeline
233 iter->Stop();
234
235 // Restore configuration
236 config::set_seed(original_seed);
237 config::set_num_parallel_workers(original_num_parallel_workers);
238 }
239