1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <iostream>
17 #include <memory>
18 #include <vector>
19
20 #include "minddata/dataset/core/client.h"
21 #include "common/common.h"
22 #include "utils/ms_utils.h"
23 #include "gtest/gtest.h"
24 #include "utils/log_adapter.h"
25 #include "minddata/dataset/engine/data_schema.h"
26 #include "minddata/dataset/engine/datasetops/source/text_file_op.h"
27 #include "minddata/dataset/util/status.h"
28
29 namespace common = mindspore::common;
30
31 using namespace mindspore::dataset;
32 using mindspore::LogStream;
33 using mindspore::ExceptionType::NoExceptionType;
34 using mindspore::MsLogLevel::INFO;
35
36 class MindDataTestTextFileOp : public UT::DatasetOpTesting {};
37
TEST_F(MindDataTestTextFileOp,TestTextFileBasic)38 TEST_F(MindDataTestTextFileOp, TestTextFileBasic) {
39 // Start with an empty execution tree
40 auto tree = std::make_shared<ExecutionTree>();
41 Status rc;
42 std::string dataset_path;
43 dataset_path = datasets_root_path_ + "/testTextFileDataset/1.txt";
44 std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
45 int32_t num_workers = 1; // Only one file
46 int32_t op_connector_size = 2;
47 int32_t worker_connector_size = config_manager->worker_connector_size();
48 int64_t total_rows = 0; // read all rows
49 std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
50 rc = schema->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1));
51 ASSERT_OK(rc);
52 std::vector<std::string> files = {dataset_path};
53 bool shuffle_files = false;
54 int32_t num_devices = 1;
55 int32_t device_id = 0;
56
57 std::shared_ptr<TextFileOp> op =
58 std::make_shared<TextFileOp>(num_workers, total_rows, worker_connector_size, std::move(schema), files,
59 op_connector_size, shuffle_files, num_devices, device_id);
60 rc = op->Init();
61 ASSERT_OK(rc);
62
63 rc = tree->AssociateNode(op);
64 ASSERT_OK(rc);
65
66 rc = tree->AssignRoot(op);
67 ASSERT_OK(rc);
68
69 MS_LOG(INFO) << "Launching tree and begin iteration.";
70 rc = tree->Prepare();
71 ASSERT_OK(rc);
72
73 rc = tree->Launch();
74 ASSERT_OK(rc);
75
76 // Start the loop of reading tensors from our pipeline
77 DatasetIterator di(tree);
78 TensorRow tensor_list;
79 rc = di.FetchNextTensorRow(&tensor_list);
80 ASSERT_OK(rc);
81
82 int row_count = 0;
83 while (!tensor_list.empty()) {
84 // Display the tensor by calling the printer on it
85 for (int i = 0; i < tensor_list.size(); i++) {
86 std::ostringstream ss;
87 ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
88 MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
89 }
90
91 rc = di.FetchNextTensorRow(&tensor_list);
92 ASSERT_OK(rc);
93 row_count++;
94 }
95
96 ASSERT_EQ(row_count, 3);
97 }
98
TEST_F(MindDataTestTextFileOp,TestTotalRows)99 TEST_F(MindDataTestTextFileOp, TestTotalRows) {
100 std::string tf_file1 = datasets_root_path_ + "/testTextFileDataset/1.txt";
101 std::string tf_file2 = datasets_root_path_ + "/testTextFileDataset/2.txt";
102 std::vector<std::string> files;
103 files.push_back(tf_file1);
104 int64_t total_rows = 0;
105 TextFileOp::CountAllFileRows(files, &total_rows);
106 ASSERT_EQ(total_rows, 3);
107 files.clear();
108
109 files.push_back(tf_file2);
110 TextFileOp::CountAllFileRows(files, &total_rows);
111 ASSERT_EQ(total_rows, 2);
112 files.clear();
113
114 files.push_back(tf_file1);
115 files.push_back(tf_file2);
116 TextFileOp::CountAllFileRows(files, &total_rows);
117 ASSERT_EQ(total_rows, 5);
118 files.clear();
119 }
120
TEST_F(MindDataTestTextFileOp,TestTotalRowsFileNotExist)121 TEST_F(MindDataTestTextFileOp, TestTotalRowsFileNotExist) {
122 std::string tf_file1 = datasets_root_path_ + "/does/not/exist/0.txt";
123 std::vector<std::string> files;
124 files.push_back(tf_file1);
125 int64_t total_rows = 0;
126 TextFileOp::CountAllFileRows(files, &total_rows);
127 ASSERT_EQ(total_rows, 0);
128 }
129