1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include <iostream> 17 #include <memory> 18 #include <vector> 19 20 #include "minddata/dataset/core/client.h" 21 #include "common/common.h" 22 #include "utils/ms_utils.h" 23 #include "gtest/gtest.h" 24 #include "utils/log_adapter.h" 25 #include "minddata/dataset/engine/data_schema.h" 26 #include "minddata/dataset/engine/datasetops/source/text_file_op.h" 27 #include "minddata/dataset/util/status.h" 28 29 namespace common = mindspore::common; 30 31 using namespace mindspore::dataset; 32 using mindspore::LogStream; 33 using mindspore::ExceptionType::NoExceptionType; 34 using mindspore::MsLogLevel::INFO; 35 36 class MindDataTestTextFileOp : public UT::DatasetOpTesting {}; 37 38 TEST_F(MindDataTestTextFileOp, TestTextFileBasic) { 39 // Start with an empty execution tree 40 auto tree = std::make_shared<ExecutionTree>(); 41 Status rc; 42 std::string dataset_path; 43 dataset_path = datasets_root_path_ + "/testTextFileDataset/1.txt"; 44 std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager(); 45 int32_t num_workers = 1; // Only one file 46 int32_t op_connector_size = 2; 47 int32_t worker_connector_size = config_manager->worker_connector_size(); 48 int64_t total_rows = 0; // read all rows 49 std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>(); 50 rc = schema->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)); 51 ASSERT_OK(rc); 52 std::vector<std::string> files = {dataset_path}; 53 bool shuffle_files = false; 54 int32_t num_devices = 1; 55 int32_t device_id = 0; 56 57 std::shared_ptr<TextFileOp> op = 58 std::make_shared<TextFileOp>(num_workers, total_rows, worker_connector_size, std::move(schema), files, 59 op_connector_size, shuffle_files, num_devices, device_id); 60 rc = op->Init(); 61 ASSERT_OK(rc); 62 63 rc = tree->AssociateNode(op); 64 ASSERT_OK(rc); 65 66 rc = tree->AssignRoot(op); 67 ASSERT_OK(rc); 68 69 MS_LOG(INFO) << "Launching tree and begin iteration."; 70 rc = tree->Prepare(); 71 ASSERT_OK(rc); 72 73 rc = tree->Launch(); 74 ASSERT_OK(rc); 75 76 // Start the loop of reading tensors from our pipeline 77 DatasetIterator di(tree); 78 TensorRow tensor_list; 79 rc = di.FetchNextTensorRow(&tensor_list); 80 ASSERT_OK(rc); 81 82 int row_count = 0; 83 while (!tensor_list.empty()) { 84 // Display the tensor by calling the printer on it 85 for (int i = 0; i < tensor_list.size(); i++) { 86 std::ostringstream ss; 87 ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; 88 MS_LOG(INFO) << "Tensor print: " << ss.str() << "."; 89 } 90 91 rc = di.FetchNextTensorRow(&tensor_list); 92 ASSERT_OK(rc); 93 row_count++; 94 } 95 96 ASSERT_EQ(row_count, 3); 97 } 98 99 TEST_F(MindDataTestTextFileOp, TestTotalRows) { 100 std::string tf_file1 = datasets_root_path_ + "/testTextFileDataset/1.txt"; 101 std::string tf_file2 = datasets_root_path_ + "/testTextFileDataset/2.txt"; 102 std::vector<std::string> files; 103 files.push_back(tf_file1); 104 int64_t total_rows = 0; 105 TextFileOp::CountAllFileRows(files, &total_rows); 106 ASSERT_EQ(total_rows, 3); 107 files.clear(); 108 109 files.push_back(tf_file2); 110 TextFileOp::CountAllFileRows(files, &total_rows); 111 ASSERT_EQ(total_rows, 2); 112 files.clear(); 113 114 files.push_back(tf_file1); 115 files.push_back(tf_file2); 116 TextFileOp::CountAllFileRows(files, &total_rows); 117 ASSERT_EQ(total_rows, 5); 118 files.clear(); 119 } 120 121 TEST_F(MindDataTestTextFileOp, TestTotalRowsFileNotExist) { 122 std::string tf_file1 = datasets_root_path_ + "/does/not/exist/0.txt"; 123 std::vector<std::string> files; 124 files.push_back(tf_file1); 125 int64_t total_rows = 0; 126 TextFileOp::CountAllFileRows(files, &total_rows); 127 ASSERT_EQ(total_rows, 0); 128 } 129