1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <memory>
17 #include <string>
18 #include "common/common.h"
19 #include "minddata/dataset/core/client.h"
20 #include "minddata/dataset/engine/data_schema.h"
21 #include "minddata/dataset/engine/datasetops/source/album_op.h"
22 #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
23
24 #include "minddata/dataset/util/status.h"
25 #include "gtest/gtest.h"
26 #include "utils/log_adapter.h"
27 #include "securec.h"
28 #include "minddata/dataset/include/dataset/datasets.h"
29 #include "minddata/dataset/include/dataset/transforms.h"
30
31 using namespace mindspore::dataset;
32 using mindspore::LogStream;
33 using mindspore::ExceptionType::NoExceptionType;
34 using mindspore::MsLogLevel::ERROR;
35
36 // std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
37
38 // std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
39
AlbumSchema(int64_t num_works,int64_t conns,std::string path,std::string schema_file,std::vector<std::string> column_names={},bool shuf=false,std::shared_ptr<SamplerRT> sampler=nullptr,bool decode=false)40 std::shared_ptr<AlbumOp> AlbumSchema(int64_t num_works, int64_t conns, std::string path, std::string schema_file,
41 std::vector<std::string> column_names = {}, bool shuf = false,
42 std::shared_ptr<SamplerRT> sampler = nullptr, bool decode = false) {
43 auto schema = std::make_unique<DataSchema>();
44 // AlbumOp constructor for reference
45 // AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode,
46 // const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema,
47 // std::shared_ptr<SamplerRT> sampler)
48
49 // default schema construction:
50
51 (void)schema->LoadSchemaFile(schema_file, column_names);
52 std::set<std::string> ext = {".json"};
53 if (sampler == nullptr) {
54 const int64_t num_samples = 0; // default num samples of 0 means to sample entire set of data
55 const int64_t start_index = 0;
56 sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
57 }
58 std::shared_ptr<AlbumOp> so =
59 std::make_shared<AlbumOp>(num_works, path, conns, decode, ext, std::move(schema), std::move(sampler));
60 return so;
61 }
62
63 class MindDataTestAlbum : public UT::DatasetOpTesting {
64 protected:
65 };
66
TEST_F(MindDataTestAlbum,TestSequentialAlbumWithSchema)67 TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchema) {
68 std::string folder_path = datasets_root_path_ + "/testAlbum/images";
69 std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
70 std::vector<std::string> column_names = {"image", "label", "id"};
71 auto op1 = AlbumSchema(16, 32, folder_path, schema_file, column_names, false);
72 std::shared_ptr<RepeatOp> op2 = Repeat(2);
73 op1->SetTotalRepeats(2);
74 op1->SetNumRepeatsPerEpoch(2);
75 std::shared_ptr<ExecutionTree> tree = Build({op1, op2});
76 ASSERT_OK(tree->Prepare());
77 ASSERT_OK(tree->Launch());
78 DatasetIterator di(tree);
79 TensorMap tensor_map;
80 ASSERT_OK(di.GetNextAsMap(&tensor_map));
81 uint64_t i = 0;
82 std::string_view label = 0;
83 while (tensor_map.size() != 0) {
84 EXPECT_TRUE(tensor_map["label"]->GetItemAt(&label, {0}));
85 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
86 << tensor_map["label"] << "\n";
87 i++;
88 ASSERT_OK(di.GetNextAsMap(&tensor_map));
89 }
90 MS_LOG(INFO) << "got rows: " << i << "\n";
91 EXPECT_TRUE(i == 14);
92 }
93
TEST_F(MindDataTestAlbum,TestSequentialAlbumWithSchemaNoOrder)94 TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaNoOrder) {
95 std::string folder_path = datasets_root_path_ + "/testAlbum/images";
96 std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
97 auto op1 = AlbumSchema(16, 32, folder_path, schema_file);
98 std::shared_ptr<RepeatOp> op2 = Repeat(2);
99 op1->SetTotalRepeats(2);
100 op1->SetNumRepeatsPerEpoch(2);
101 std::shared_ptr<ExecutionTree> tree = Build({op1, op2});
102 ASSERT_OK(tree->Prepare());
103 ASSERT_OK(tree->Launch());
104 DatasetIterator di(tree);
105 TensorMap tensor_map;
106 ASSERT_OK(di.GetNextAsMap(&tensor_map));
107 uint64_t i = 0;
108 std::string_view label;
109 while (tensor_map.size() != 0) {
110 EXPECT_OK(tensor_map["label"]->GetItemAt(&label, {0}));
111 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
112 << tensor_map["label"] << "\n";
113 i++;
114 ASSERT_OK(di.GetNextAsMap(&tensor_map));
115 }
116 MS_LOG(INFO) << "got rows: " << i << "\n";
117 EXPECT_TRUE(i == 14);
118 }
119
TEST_F(MindDataTestAlbum,TestSequentialAlbumWithSchemaFloat)120 TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaFloat) {
121 std::string folder_path = datasets_root_path_ + "/testAlbum/images";
122 // add the priority column
123 std::string schema_file = datasets_root_path_ + "/testAlbum/floatSchema.json";
124 auto op1 = AlbumSchema(16, 32, folder_path, schema_file);
125 std::shared_ptr<RepeatOp> op2 = Repeat(2);
126 op1->SetTotalRepeats(2);
127 op1->SetNumRepeatsPerEpoch(2);
128 std::shared_ptr<ExecutionTree> tree = Build({op1, op2});
129 tree->Prepare();
130 ASSERT_OK(tree->Launch());
131 DatasetIterator di(tree);
132 TensorMap tensor_map;
133 ASSERT_OK(di.GetNextAsMap(&tensor_map));
134 uint64_t i = 0;
135 std::string_view label;
136 double priority = 0;
137 while (tensor_map.size() != 0) {
138 EXPECT_OK(tensor_map["label"]->GetItemAt(&label, {0}));
139 EXPECT_OK(tensor_map["_priority"]->GetItemAt<double>(&priority, {0}));
140 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
141 << tensor_map["label"] << "priority: " << priority << "\n";
142 i++;
143 ASSERT_OK(di.GetNextAsMap(&tensor_map));
144 }
145 MS_LOG(INFO) << "got rows: " << i << "\n";
146 EXPECT_TRUE(i == 14);
147 }
148
TEST_F(MindDataTestAlbum,TestSequentialAlbumWithFullSchema)149 TEST_F(MindDataTestAlbum, TestSequentialAlbumWithFullSchema) {
150 std::string folder_path = datasets_root_path_ + "/testAlbum/images";
151 // add the priority column
152 std::string schema_file = datasets_root_path_ + "/testAlbum/fullSchema.json";
153 auto op1 = AlbumSchema(16, 32, folder_path, schema_file);
154 std::shared_ptr<RepeatOp> op2 = Repeat(2);
155 op1->SetTotalRepeats(2);
156 op1->SetNumRepeatsPerEpoch(2);
157 std::shared_ptr<ExecutionTree> tree = Build({op1, op2});
158 ASSERT_OK(tree->Prepare());
159 ASSERT_OK(tree->Launch());
160 DatasetIterator di(tree);
161 TensorMap tensor_map;
162 ASSERT_OK(di.GetNextAsMap(&tensor_map));
163 uint64_t i = 0;
164 std::string_view label = 0;
165 double priority = 0;
166 int64_t id = 0;
167 while (tensor_map.size() != 0) {
168 EXPECT_OK(tensor_map["label"]->GetItemAt(&label, {0}));
169 EXPECT_OK(tensor_map["_priority"]->GetItemAt<double>(&priority, {0}));
170 EXPECT_OK(tensor_map["id"]->GetItemAt<int64_t>(&id, {}));
171 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
172 << tensor_map["label"] << "priority: " << priority
173 << " embedding : " << tensor_map["_embedding"]->shape() << " id: " << id << "\n";
174 i++;
175 ASSERT_OK(di.GetNextAsMap(&tensor_map));
176 }
177 MS_LOG(INFO) << "got rows: " << i << "\n";
178 EXPECT_TRUE(i == 14);
179 }
180