• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "common/common.h"
18 #include "minddata/dataset/include/dataset/config.h"
19 #include "minddata/dataset/include/dataset/datasets.h"
20 
21 using namespace mindspore::dataset;
22 using mindspore::dataset::ShuffleMode;
23 using mindspore::dataset::Tensor;
24 
25 class MindDataTestPipeline : public UT::DatasetOpTesting {
26  protected:
27 };
28 
TEST_F(MindDataTestPipeline,TestConfigSetting)29 TEST_F(MindDataTestPipeline, TestConfigSetting) {
30   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConfigSetting.";
31   // Test basic configuration setting
32 
33   // Save original configuration values
34   auto original_num_parallel_workers = config::get_num_parallel_workers();
35   auto original_prefetch_size = config::get_prefetch_size();
36   auto original_seed = config::get_seed();
37   auto original_monitor_sampling_interval = config::get_monitor_sampling_interval();
38 
39   // Load configuration from file
40   std::string config_file_path = datasets_root_path_ + "/declient.cfg";
41   auto load_status = config::load(config_file_path);
42   EXPECT_EQ(load_status, true);
43 
44   // Test configuration loaded
45   EXPECT_EQ(config::get_num_parallel_workers(), 8);
46   EXPECT_EQ(config::get_prefetch_size(), 16);
47   EXPECT_EQ(config::get_seed(), 5489);
48   EXPECT_EQ(config::get_monitor_sampling_interval(), 15);
49 
50   // Set configuration
51   auto status_set_num_parallel_workers = config::set_num_parallel_workers(2);
52   auto status_set_prefetch_size = config::set_prefetch_size(4);
53   auto status_set_seed = config::set_seed(5);
54   auto status_set_monitor_sampling_interval = config::set_monitor_sampling_interval(45);
55   EXPECT_EQ(status_set_num_parallel_workers, true);
56   EXPECT_EQ(status_set_prefetch_size, true);
57   EXPECT_EQ(status_set_seed, true);
58   EXPECT_EQ(status_set_monitor_sampling_interval, true);
59 
60   // Test configuration set
61   EXPECT_EQ(config::get_num_parallel_workers(), 2);
62   EXPECT_EQ(config::get_prefetch_size(), 4);
63   EXPECT_EQ(config::get_seed(), 5);
64   EXPECT_EQ(config::get_monitor_sampling_interval(), 45);
65 
66   // Restore original configuration values
67   config::set_num_parallel_workers(original_num_parallel_workers);
68   config::set_prefetch_size(original_prefetch_size);
69   config::set_seed(original_seed);
70   config::set_monitor_sampling_interval(original_monitor_sampling_interval);
71 }
72 
TEST_F(MindDataTestPipeline,TestConfigParamCheck)73 TEST_F(MindDataTestPipeline, TestConfigParamCheck) {
74   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestConfigParamCheck.";
75   // Test configuration setting with wrong parameter
76 
77   // Save original configuration values
78   auto original_num_parallel_workers = config::get_num_parallel_workers();
79   auto original_prefetch_size = config::get_prefetch_size();
80   auto original_seed = config::get_seed();
81   auto original_monitor_sampling_interval = config::get_monitor_sampling_interval();
82 
83   // Load configuration from file with wrong path
84   std::string config_file_path = datasets_root_path_ + "/not_exist.cfg";
85   auto load_status = config::load(config_file_path);
86   EXPECT_EQ(load_status, false);
87 
88   // Set configuration with wrong parameter
89   auto status_set_num_parallel_workers = config::set_num_parallel_workers(0);
90   auto status_set_prefetch_size = config::set_prefetch_size(0);
91   auto status_set_seed = config::set_seed(-1);
92   auto status_set_monitor_sampling_interval = config::set_monitor_sampling_interval(0);
93   EXPECT_EQ(status_set_num_parallel_workers, false);
94   EXPECT_EQ(status_set_prefetch_size, false);
95   EXPECT_EQ(status_set_seed, false);
96   EXPECT_EQ(status_set_monitor_sampling_interval, false);
97 
98   // Restore original configuration values
99   config::set_num_parallel_workers(original_num_parallel_workers);
100   config::set_prefetch_size(original_prefetch_size);
101   config::set_seed(original_seed);
102   config::set_monitor_sampling_interval(original_monitor_sampling_interval);
103 }
104 
TEST_F(MindDataTestPipeline,TestShuffleWithSeed)105 TEST_F(MindDataTestPipeline, TestShuffleWithSeed) {
106   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestShuffleWithSeed.";
107   // Test deterministic shuffle with setting the seed
108 
109   // Save and set the seed
110   uint32_t original_seed = config::get_seed();
111   uint32_t original_num_parallel_workers = config::get_num_parallel_workers();
112   MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers;
113   config::set_seed(654);
114   config::set_num_parallel_workers(1);
115 
116   // Create a TextFile Dataset with single text file which has three samples
117   std::string text_file = datasets_root_path_ + "/testTextFileDataset/1.txt";
118   std::shared_ptr<Dataset> ds = TextFile({text_file}, 0, ShuffleMode::kFalse);
119   EXPECT_NE(ds, nullptr);
120 
121   // Shuffle the dataset with buffer_size=3
122   ds = ds->Shuffle(3);
123   EXPECT_NE(ds, nullptr);
124 
125   // Create an iterator over the result of the above dataset.
126   // This will trigger the creation of the Execution Tree and launch it.
127   std::shared_ptr<Iterator> iter = ds->CreateIterator();
128   EXPECT_NE(iter, nullptr);
129 
130   // Iterate the dataset and get each row
131   std::unordered_map<std::string, mindspore::MSTensor> row;
132   ASSERT_OK(iter->GetNextRow(&row));
133   EXPECT_NE(row.find("text"), row.end());
134 
135   std::vector<std::string> expected_result = {"Good luck to everyone.", "Be happy every day.", "This is a text file."};
136 
137   uint64_t i = 0;
138   while (row.size() != 0) {
139     auto text = row["text"];
140 
141     std::shared_ptr<Tensor> de_text;
142     ASSERT_OK(Tensor::CreateFromMSTensor(text, &de_text));
143     std::string_view sv;
144     ASSERT_OK(de_text->GetItemAt(&sv, {}));
145     std::string ss(sv);
146     MS_LOG(INFO) << "Text length: " << ss.length() << ", Text: " << ss.substr(0, 50);
147     // Compare against expected result
148     EXPECT_STREQ(ss.c_str(), expected_result[i].c_str());
149 
150     i++;
151     ASSERT_OK(iter->GetNextRow(&row));
152   }
153 
154   // Expect 3 samples
155   EXPECT_EQ(i, 3);
156 
157   // Manually terminate the pipeline
158   iter->Stop();
159 
160   // Restore configuration
161   config::set_seed(original_seed);
162   config::set_num_parallel_workers(original_num_parallel_workers);
163 }
164 
TEST_F(MindDataTestPipeline,TestCallShuffleTwice)165 TEST_F(MindDataTestPipeline, TestCallShuffleTwice) {
166   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCallShuffleTwice.";
167   // Test shuffle and repeat with setting the seed.
168   // The second copy will be different from the first one because results will be different when calling shuffle twice.
169 
170   // Save and set the seed
171   uint32_t original_seed = config::get_seed();
172   uint32_t original_num_parallel_workers = config::get_num_parallel_workers();
173   MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers;
174   config::set_seed(654);
175   config::set_num_parallel_workers(1);
176 
177   // Create a TextFile Dataset with single text file which has three samples
178   std::string text_file = datasets_root_path_ + "/testTextFileDataset/1.txt";
179   std::shared_ptr<Dataset> ds = TextFile({text_file}, 0, ShuffleMode::kFalse);
180   EXPECT_NE(ds, nullptr);
181 
182   // Shuffle the dataset with buffer_size=3
183   ds = ds->Shuffle(3);
184   EXPECT_NE(ds, nullptr);
185 
186   // Repeat the dataset twice
187   ds = ds->Repeat(2);
188   EXPECT_NE(ds, nullptr);
189 
190   // Create an iterator over the result of the above dataset.
191   // This will trigger the creation of the Execution Tree and launch it.
192   std::shared_ptr<Iterator> iter = ds->CreateIterator();
193   EXPECT_NE(iter, nullptr);
194 
195   // Iterate the dataset and get each row
196   std::unordered_map<std::string, mindspore::MSTensor> row;
197   ASSERT_OK(iter->GetNextRow(&row));
198   EXPECT_NE(row.find("text"), row.end());
199 
200   std::vector<std::string> first_copy;
201   std::vector<std::string> second_copy;
202 
203   uint64_t i = 0;
204   while (row.size() != 0) {
205     auto text = row["text"];
206     std::shared_ptr<Tensor> de_text;
207     ASSERT_OK(Tensor::CreateFromMSTensor(text, &de_text));
208     std::string_view sv;
209     ASSERT_OK(de_text->GetItemAt(&sv, {}));
210     std::string ss(sv);
211     MS_LOG(INFO) << "Text length: " << ss.length() << ", Text: " << ss.substr(0, 50);
212 
213     // The first three samples are the first copy and the rest are the second
214     if (i < 3) {
215       first_copy.push_back(ss);
216     } else {
217       second_copy.push_back(ss);
218     }
219 
220     i++;
221     ASSERT_OK(iter->GetNextRow(&row));
222   }
223 
224   // Expect 6 samples
225   EXPECT_EQ(i, 6);
226 
227   // Compare the two copies which are deterministic difference
228   for (int j = 0; j < 3; j++) {
229     EXPECT_STRNE(first_copy.at(j).c_str(), second_copy.at(j).c_str());
230   }
231 
232   // Manually terminate the pipeline
233   iter->Stop();
234 
235   // Restore configuration
236   config::set_seed(original_seed);
237   config::set_num_parallel_workers(original_num_parallel_workers);
238 }
239