• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <iostream>
17 #include <memory>
18 #include <vector>
19 
20 #include "minddata/dataset/core/client.h"
21 #include "common/common.h"
22 #include "utils/ms_utils.h"
23 #include "gtest/gtest.h"
24 #include "utils/log_adapter.h"
25 #include "minddata/dataset/engine/data_schema.h"
26 #include "minddata/dataset/engine/datasetops/source/text_file_op.h"
27 #include "minddata/dataset/util/status.h"
28 
29 namespace common = mindspore::common;
30 
31 using namespace mindspore::dataset;
32 using mindspore::LogStream;
33 using mindspore::ExceptionType::NoExceptionType;
34 using mindspore::MsLogLevel::INFO;
35 
36 class MindDataTestTextFileOp : public UT::DatasetOpTesting {};
37 
38 TEST_F(MindDataTestTextFileOp, TestTextFileBasic) {
39   // Start with an empty execution tree
40   auto tree = std::make_shared<ExecutionTree>();
41   Status rc;
42   std::string dataset_path;
43   dataset_path = datasets_root_path_ + "/testTextFileDataset/1.txt";
44   std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
45   int32_t num_workers = 1;  // Only one file
46   int32_t op_connector_size = 2;
47   int32_t worker_connector_size = config_manager->worker_connector_size();
48   int64_t total_rows = 0;  // read all rows
49   std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
50   rc = schema->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1));
51   ASSERT_OK(rc);
52   std::vector<std::string> files = {dataset_path};
53   bool shuffle_files = false;
54   int32_t num_devices = 1;
55   int32_t device_id = 0;
56 
57   std::shared_ptr<TextFileOp> op =
58     std::make_shared<TextFileOp>(num_workers, total_rows, worker_connector_size, std::move(schema), files,
59                                  op_connector_size, shuffle_files, num_devices, device_id);
60   rc = op->Init();
61   ASSERT_OK(rc);
62 
63   rc = tree->AssociateNode(op);
64   ASSERT_OK(rc);
65 
66   rc = tree->AssignRoot(op);
67   ASSERT_OK(rc);
68 
69   MS_LOG(INFO) << "Launching tree and begin iteration.";
70   rc = tree->Prepare();
71   ASSERT_OK(rc);
72 
73   rc = tree->Launch();
74   ASSERT_OK(rc);
75 
76   // Start the loop of reading tensors from our pipeline
77   DatasetIterator di(tree);
78   TensorRow tensor_list;
79   rc = di.FetchNextTensorRow(&tensor_list);
80   ASSERT_OK(rc);
81 
82   int row_count = 0;
83   while (!tensor_list.empty()) {
84     // Display the tensor by calling the printer on it
85     for (int i = 0; i < tensor_list.size(); i++) {
86       std::ostringstream ss;
87       ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
88       MS_LOG(INFO) << "Tensor print: " << ss.str() << ".";
89     }
90 
91     rc = di.FetchNextTensorRow(&tensor_list);
92     ASSERT_OK(rc);
93     row_count++;
94   }
95 
96   ASSERT_EQ(row_count, 3);
97 }
98 
99 TEST_F(MindDataTestTextFileOp, TestTotalRows) {
100   std::string tf_file1 = datasets_root_path_ + "/testTextFileDataset/1.txt";
101   std::string tf_file2 = datasets_root_path_ + "/testTextFileDataset/2.txt";
102   std::vector<std::string> files;
103   files.push_back(tf_file1);
104   int64_t total_rows = 0;
105   TextFileOp::CountAllFileRows(files, &total_rows);
106   ASSERT_EQ(total_rows, 3);
107   files.clear();
108 
109   files.push_back(tf_file2);
110   TextFileOp::CountAllFileRows(files, &total_rows);
111   ASSERT_EQ(total_rows, 2);
112   files.clear();
113 
114   files.push_back(tf_file1);
115   files.push_back(tf_file2);
116   TextFileOp::CountAllFileRows(files, &total_rows);
117   ASSERT_EQ(total_rows, 5);
118   files.clear();
119 }
120 
121 TEST_F(MindDataTestTextFileOp, TestTotalRowsFileNotExist) {
122   std::string tf_file1 = datasets_root_path_ + "/does/not/exist/0.txt";
123   std::vector<std::string> files;
124   files.push_back(tf_file1);
125   int64_t total_rows = 0;
126   TextFileOp::CountAllFileRows(files, &total_rows);
127   ASSERT_EQ(total_rows, 0);
128 }
129