1 /** 2 * Copyright 2019-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include <memory> 17 #include <string> 18 #include "minddata/dataset/core/client.h" 19 // #include "minddata/dataset/core/pybind_support.h" 20 // #include "minddata/dataset/core/tensor.h" 21 // #include "minddata/dataset/core/tensor_shape.h" 22 // #include "minddata/dataset/engine/datasetops/batch_op.h" 23 #include "minddata/dataset/engine/datasetops/source/tf_reader_op.h" 24 #include "common/common.h" 25 #include "gtest/gtest.h" 26 #include "utils/log_adapter.h" 27 #include "securec.h" 28 #include "minddata/dataset/util/status.h" 29 // #include "pybind11/numpy.h" 30 // #include "pybind11/pybind11.h" 31 32 // #include "utils/ms_utils.h" 33 34 // #include "minddata/dataset/engine/db_connector.h" 35 // #include "minddata/dataset/kernels/data/data_utils.h" 36 37 namespace common = mindspore::common; 38 namespace de = mindspore::dataset; 39 40 using namespace mindspore::dataset; 41 using mindspore::LogStream; 42 using mindspore::ExceptionType::NoExceptionType; 43 using mindspore::MsLogLevel::ERROR; 44 45 class MindDataTestBatchOp : public UT::DatasetOpTesting { 46 protected: 47 }; 48 49 TEST_F(MindDataTestBatchOp, TestSimpleBatch) { 50 std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; 51 bool success = false; 52 const std::shared_ptr<de::BatchOp> &op = Batch(12); 53 EXPECT_EQ(op->Name(), "BatchOp"); 54 55 auto tree = Build({TFReader(schema_file), op}); 56 tree->Prepare(); 57 Status rc = tree->Launch(); 58 if (rc.IsError()) { 59 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; 60 } else { 61 int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; 62 de::DatasetIterator di(tree); 63 TensorMap tensor_map; 64 rc = di.GetNextAsMap(&tensor_map); 65 EXPECT_TRUE(rc.IsOk()); 66 std::shared_ptr<de::Tensor> t; 67 rc = de::Tensor::CreateFromMemory(de::TensorShape({12, 1}), de::DataType(DataType::DE_INT64), 68 (unsigned char *)payload, &t); 69 EXPECT_TRUE(rc.IsOk()); 70 // verify the actual data in Tensor is correct 71 EXPECT_EQ(*t == *tensor_map["col_sint64"], true); 72 // change what's in Tensor and verify this time the data is incorrect1; 73 EXPECT_EQ(*t == *tensor_map["col_sint16"], false); 74 rc = di.GetNextAsMap(&tensor_map); 75 EXPECT_TRUE(rc.IsOk()); 76 if (tensor_map.size() == 0) { 77 success = true; 78 } 79 } 80 EXPECT_EQ(success, true); 81 } 82 83 TEST_F(MindDataTestBatchOp, TestRepeatBatchDropTrue) { 84 std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; 85 bool success = false; 86 auto op1 = TFReader(schema_file); 87 auto op2 = Repeat(2); 88 auto op3 = Batch(7, true); 89 op1->SetTotalRepeats(2); 90 op1->SetNumRepeatsPerEpoch(2); 91 auto tree = Build({op1, op2, op3}); 92 tree->Prepare(); 93 Status rc = tree->Launch(); 94 if (rc.IsError()) { 95 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; 96 } else { 97 int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807, 98 -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; 99 de::DatasetIterator di(tree); 100 std::shared_ptr<de::Tensor> t1, t2, t3; 101 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), 102 (unsigned char *)payload, &t1); 103 EXPECT_TRUE(rc.IsOk()); 104 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), 105 (unsigned char *)(payload + 7), &t2); 106 EXPECT_TRUE(rc.IsOk()); 107 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), 108 (unsigned char *)(payload + 2), &t3); 109 EXPECT_TRUE(rc.IsOk()); 110 111 TensorMap tensor_map; 112 rc = di.GetNextAsMap(&tensor_map); 113 EXPECT_TRUE(rc.IsOk()); 114 EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext() 115 116 rc = di.GetNextAsMap(&tensor_map); 117 EXPECT_TRUE(rc.IsOk()); 118 EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext() 119 120 rc = di.GetNextAsMap(&tensor_map); 121 EXPECT_TRUE(rc.IsOk()); 122 EXPECT_EQ(*t3 == *(tensor_map["col_sint64"]), true); // third call to getNext() 123 124 rc = di.GetNextAsMap(&tensor_map); 125 EXPECT_TRUE(rc.IsOk()); 126 if (tensor_map.size() == 0) { 127 success = true; 128 } 129 } 130 EXPECT_EQ(success, true); 131 } 132 133 TEST_F(MindDataTestBatchOp, TestRepeatBatchDropFalse) { 134 std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; 135 bool success = false; 136 auto op1 = TFReader(schema_file); 137 auto op2 = Repeat(2); 138 auto op3 = Batch(7, false); 139 op1->SetTotalRepeats(2); 140 op1->SetNumRepeatsPerEpoch(2); 141 auto tree = Build({op1, op2, op3}); 142 tree->Prepare(); 143 Status rc = tree->Launch(); 144 if (rc.IsError()) { 145 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; 146 } else { 147 int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807, 148 -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; 149 de::DatasetIterator di(tree); 150 std::shared_ptr<de::Tensor> t1, t2, t3, t4; 151 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), 152 (unsigned char *)payload, &t1); 153 EXPECT_TRUE(rc.IsOk()); 154 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), 155 (unsigned char *)(payload + 7), &t2); 156 EXPECT_TRUE(rc.IsOk()); 157 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), 158 (unsigned char *)(payload + 2), &t3); 159 EXPECT_TRUE(rc.IsOk()); 160 rc = de::Tensor::CreateFromMemory(de::TensorShape({3, 1}), de::DataType(DataType::DE_INT64), 161 (unsigned char *)(payload + 9), &t4); 162 EXPECT_TRUE(rc.IsOk()); 163 164 TensorMap tensor_map; 165 rc = di.GetNextAsMap(&tensor_map); 166 EXPECT_TRUE(rc.IsOk()); 167 EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext() 168 169 rc = di.GetNextAsMap(&tensor_map); 170 EXPECT_TRUE(rc.IsOk()); 171 EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext() 172 173 rc = di.GetNextAsMap(&tensor_map); 174 EXPECT_TRUE(rc.IsOk()); 175 EXPECT_EQ(*t3 == *(tensor_map["col_sint64"]), true); // third call to getNext() 176 177 rc = di.GetNextAsMap(&tensor_map); 178 EXPECT_TRUE(rc.IsOk()); 179 EXPECT_EQ(*t4 == *(tensor_map["col_sint64"]), true); // last call to getNext() 180 181 rc = di.GetNextAsMap(&tensor_map); 182 EXPECT_TRUE(rc.IsOk()); 183 if (tensor_map.size() == 0) { 184 success = true; 185 } 186 } 187 EXPECT_EQ(success, true); 188 } 189 190 TEST_F(MindDataTestBatchOp, TestBatchDropFalseRepeat) { 191 std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; 192 bool success = false; 193 auto op1 = TFReader(schema_file); 194 auto op2 = Batch(7, false); 195 auto op3 = Repeat(2); 196 op1->SetTotalRepeats(2); 197 op1->SetNumRepeatsPerEpoch(2); 198 op2->SetTotalRepeats(2); 199 op2->SetNumRepeatsPerEpoch(2); 200 auto tree = Build({op1, op2, op3}); 201 tree->Prepare(); 202 Status rc = tree->Launch(); 203 if (rc.IsError()) { 204 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; 205 } else { 206 int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807, 207 -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; 208 de::DatasetIterator di(tree); 209 std::shared_ptr<de::Tensor> t1, t2; 210 rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), 211 (unsigned char *)payload, &t1); 212 EXPECT_TRUE(rc.IsOk()); 213 rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64), 214 (unsigned char *)(payload + 7), &t2); 215 EXPECT_TRUE(rc.IsOk()); 216 217 TensorMap tensor_map; 218 rc = di.GetNextAsMap(&tensor_map); 219 EXPECT_TRUE(rc.IsOk()); 220 EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext() 221 222 rc = di.GetNextAsMap(&tensor_map); 223 EXPECT_TRUE(rc.IsOk()); 224 EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext() 225 226 rc = di.GetNextAsMap(&tensor_map); 227 EXPECT_TRUE(rc.IsOk()); 228 EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // third call to getNext() 229 230 rc = di.GetNextAsMap(&tensor_map); 231 EXPECT_TRUE(rc.IsOk()); 232 EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // last call to getNext() 233 234 rc = di.GetNextAsMap(&tensor_map); 235 EXPECT_TRUE(rc.IsOk()); 236 if (tensor_map.size() == 0) { 237 success = true; 238 } 239 } 240 EXPECT_EQ(success, true); 241 } 242 243 TEST_F(MindDataTestBatchOp, TestBatchDropTrueRepeat) { 244 std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; 245 bool success = false; 246 auto op1 = TFReader(schema_file); 247 auto op2 = Batch(5, true); 248 auto op3 = Repeat(2); 249 op1->SetTotalRepeats(2); 250 op1->SetNumRepeatsPerEpoch(2); 251 op2->SetTotalRepeats(2); 252 op2->SetNumRepeatsPerEpoch(2); 253 auto tree = Build({op1, op2, op3}); 254 tree->Prepare(); 255 Status rc = tree->Launch(); 256 if (rc.IsError()) { 257 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; 258 } else { 259 int64_t payload[] = {-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807, 260 -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; 261 de::DatasetIterator di(tree); 262 std::shared_ptr<de::Tensor> t1, t2; 263 rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64), 264 (unsigned char *)payload, &t1); 265 EXPECT_TRUE(rc.IsOk()); 266 rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64), 267 (unsigned char *)(payload + 5), &t2); 268 EXPECT_TRUE(rc.IsOk()); 269 270 TensorMap tensor_map; 271 rc = di.GetNextAsMap(&tensor_map); 272 EXPECT_TRUE(rc.IsOk()); 273 EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // first call to getNext() 274 275 rc = di.GetNextAsMap(&tensor_map); 276 EXPECT_TRUE(rc.IsOk()); 277 EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // second call to getNext() 278 279 rc = di.GetNextAsMap(&tensor_map); 280 EXPECT_TRUE(rc.IsOk()); 281 EXPECT_EQ(*t1 == *(tensor_map["col_sint64"]), true); // third call to getNext() 282 283 rc = di.GetNextAsMap(&tensor_map); 284 EXPECT_TRUE(rc.IsOk()); 285 EXPECT_EQ(*t2 == *(tensor_map["col_sint64"]), true); // last call to getNext() 286 287 rc = di.GetNextAsMap(&tensor_map); 288 EXPECT_TRUE(rc.IsOk()); 289 if (tensor_map.size() == 0) { 290 success = true; 291 } 292 } 293 EXPECT_EQ(success, true); 294 } 295 296 TEST_F(MindDataTestBatchOp, TestSimpleBatchPadding) { 297 std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; 298 PadInfo m; 299 std::shared_ptr<Tensor> pad_value; 300 Tensor::CreateEmpty(TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32), &pad_value); 301 pad_value->SetItemAt<float>({}, -1); 302 m.insert({"col_1d", std::make_pair(TensorShape({4}), pad_value)}); 303 /* 304 std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); 305 auto op_connector_size = config_manager->op_connector_size(); 306 auto num_workers = config_manager->num_parallel_workers(); 307 std::vector<std::string> input_columns = {}; 308 std::vector<std::string> output_columns = {}; 309 pybind11::function batch_size_func; 310 pybind11::function batch_map_func; 311 */ 312 int32_t batch_size = 12; 313 bool drop = false; 314 std::shared_ptr<BatchOp> op = Batch(batch_size, drop, m); 315 // std::make_shared<BatchOp>(batch_size, drop, pad, op_connector_size, num_workers, input_columns, output_columns, 316 // batch_size_func, batch_map_func, m); 317 auto tree = Build({TFReader(schema_file), op}); 318 tree->Prepare(); 319 Status rc = tree->Launch(); 320 if (rc.IsError()) { 321 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; 322 } else { 323 int64_t payload[] = {-9223372036854775807 - 1, 324 1, 325 -1, 326 -1, 327 2, 328 3, 329 -1, 330 -1, 331 4, 332 5, 333 -1, 334 -1, 335 6, 336 7, 337 -1, 338 -1, 339 8, 340 9, 341 -1, 342 -1, 343 10, 344 11, 345 -1, 346 -1, 347 12, 348 13, 349 -1, 350 -1, 351 14, 352 15, 353 -1, 354 -1, 355 16, 356 17, 357 -1, 358 -1, 359 18, 360 19, 361 -1, 362 -1, 363 20, 364 21, 365 -1, 366 -1, 367 22, 368 23, 369 -1, 370 -1}; 371 std::shared_ptr<de::Tensor> t; 372 rc = de::Tensor::CreateFromMemory(de::TensorShape({12, 4}), de::DataType(DataType::DE_INT64), 373 (unsigned char *)payload, &t); 374 de::DatasetIterator di(tree); 375 TensorMap tensor_map; 376 rc = di.GetNextAsMap(&tensor_map); 377 EXPECT_TRUE((*t) == (*(tensor_map["col_1d"]))); 378 rc = di.GetNextAsMap(&tensor_map); 379 EXPECT_TRUE(tensor_map.size() == 0); 380 EXPECT_TRUE(rc.IsOk()); 381 } 382 } 383