• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <memory>
17 #include <string>
18 #include "common/common.h"
19 #include "minddata/dataset/core/client.h"
20 #include "minddata/dataset/engine/data_schema.h"
21 #include "minddata/dataset/engine/datasetops/source/album_op.h"
22 #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
23 
24 #include "minddata/dataset/util/status.h"
25 #include "gtest/gtest.h"
26 #include "utils/log_adapter.h"
27 #include "securec.h"
28 #include "minddata/dataset/include/dataset/datasets.h"
29 #include "minddata/dataset/include/dataset/transforms.h"
30 
31 using namespace mindspore::dataset;
32 using mindspore::LogStream;
33 using mindspore::ExceptionType::NoExceptionType;
34 using mindspore::MsLogLevel::ERROR;
35 
36 // std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
37 
38 // std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
39 
AlbumSchema(int64_t num_works,int64_t conns,std::string path,std::string schema_file,std::vector<std::string> column_names={},bool shuf=false,std::shared_ptr<SamplerRT> sampler=nullptr,bool decode=false)40 std::shared_ptr<AlbumOp> AlbumSchema(int64_t num_works, int64_t conns, std::string path, std::string schema_file,
41                                      std::vector<std::string> column_names = {}, bool shuf = false,
42                                      std::shared_ptr<SamplerRT> sampler = nullptr, bool decode = false) {
43   auto schema = std::make_unique<DataSchema>();
44   // AlbumOp constructor for reference
45   // AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode,
46   //         const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema,
47   //         std::shared_ptr<SamplerRT> sampler)
48 
49   // default schema construction:
50 
51   (void)schema->LoadSchemaFile(schema_file, column_names);
52   std::set<std::string> ext = {".json"};
53   if (sampler == nullptr) {
54     const int64_t num_samples = 0;  // default num samples of 0 means to sample entire set of data
55     const int64_t start_index = 0;
56     sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
57   }
58   std::shared_ptr<AlbumOp> so =
59     std::make_shared<AlbumOp>(num_works, path, conns, decode, ext, std::move(schema), std::move(sampler));
60   return so;
61 }
62 
63 class MindDataTestAlbum : public UT::DatasetOpTesting {
64  protected:
65 };
66 
TEST_F(MindDataTestAlbum,TestSequentialAlbumWithSchema)67 TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchema) {
68   std::string folder_path = datasets_root_path_ + "/testAlbum/images";
69   std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
70   std::vector<std::string> column_names = {"image", "label", "id"};
71   auto op1 = AlbumSchema(16, 32, folder_path, schema_file, column_names, false);
72   std::shared_ptr<RepeatOp> op2 = Repeat(2);
73   op1->SetTotalRepeats(2);
74   op1->SetNumRepeatsPerEpoch(2);
75   std::shared_ptr<ExecutionTree> tree = Build({op1, op2});
76   ASSERT_OK(tree->Prepare());
77   ASSERT_OK(tree->Launch());
78   DatasetIterator di(tree);
79   TensorMap tensor_map;
80   ASSERT_OK(di.GetNextAsMap(&tensor_map));
81   uint64_t i = 0;
82   std::string_view label = 0;
83   while (tensor_map.size() != 0) {
84     EXPECT_TRUE(tensor_map["label"]->GetItemAt(&label, {0}));
85     MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
86                   << tensor_map["label"] << "\n";
87     i++;
88     ASSERT_OK(di.GetNextAsMap(&tensor_map));
89   }
90   MS_LOG(INFO) << "got rows: " << i << "\n";
91   EXPECT_TRUE(i == 14);
92 }
93 
TEST_F(MindDataTestAlbum,TestSequentialAlbumWithSchemaNoOrder)94 TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaNoOrder) {
95   std::string folder_path = datasets_root_path_ + "/testAlbum/images";
96   std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
97   auto op1 = AlbumSchema(16, 32, folder_path, schema_file);
98   std::shared_ptr<RepeatOp> op2 = Repeat(2);
99   op1->SetTotalRepeats(2);
100   op1->SetNumRepeatsPerEpoch(2);
101   std::shared_ptr<ExecutionTree> tree = Build({op1, op2});
102   ASSERT_OK(tree->Prepare());
103   ASSERT_OK(tree->Launch());
104   DatasetIterator di(tree);
105   TensorMap tensor_map;
106   ASSERT_OK(di.GetNextAsMap(&tensor_map));
107   uint64_t i = 0;
108   std::string_view label;
109   while (tensor_map.size() != 0) {
110     EXPECT_OK(tensor_map["label"]->GetItemAt(&label, {0}));
111     MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
112                   << tensor_map["label"] << "\n";
113     i++;
114     ASSERT_OK(di.GetNextAsMap(&tensor_map));
115   }
116   MS_LOG(INFO) << "got rows: " << i << "\n";
117   EXPECT_TRUE(i == 14);
118 }
119 
TEST_F(MindDataTestAlbum,TestSequentialAlbumWithSchemaFloat)120 TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaFloat) {
121   std::string folder_path = datasets_root_path_ + "/testAlbum/images";
122   // add the priority column
123   std::string schema_file = datasets_root_path_ + "/testAlbum/floatSchema.json";
124   auto op1 = AlbumSchema(16, 32, folder_path, schema_file);
125   std::shared_ptr<RepeatOp> op2 = Repeat(2);
126   op1->SetTotalRepeats(2);
127   op1->SetNumRepeatsPerEpoch(2);
128   std::shared_ptr<ExecutionTree> tree = Build({op1, op2});
129   tree->Prepare();
130   ASSERT_OK(tree->Launch());
131   DatasetIterator di(tree);
132   TensorMap tensor_map;
133   ASSERT_OK(di.GetNextAsMap(&tensor_map));
134   uint64_t i = 0;
135   std::string_view label;
136   double priority = 0;
137   while (tensor_map.size() != 0) {
138     EXPECT_OK(tensor_map["label"]->GetItemAt(&label, {0}));
139     EXPECT_OK(tensor_map["_priority"]->GetItemAt<double>(&priority, {0}));
140     MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
141                   << tensor_map["label"] << "priority: " << priority << "\n";
142     i++;
143     ASSERT_OK(di.GetNextAsMap(&tensor_map));
144   }
145   MS_LOG(INFO) << "got rows: " << i << "\n";
146   EXPECT_TRUE(i == 14);
147 }
148 
TEST_F(MindDataTestAlbum,TestSequentialAlbumWithFullSchema)149 TEST_F(MindDataTestAlbum, TestSequentialAlbumWithFullSchema) {
150   std::string folder_path = datasets_root_path_ + "/testAlbum/images";
151   // add the priority column
152   std::string schema_file = datasets_root_path_ + "/testAlbum/fullSchema.json";
153   auto op1 = AlbumSchema(16, 32, folder_path, schema_file);
154   std::shared_ptr<RepeatOp> op2 = Repeat(2);
155   op1->SetTotalRepeats(2);
156   op1->SetNumRepeatsPerEpoch(2);
157   std::shared_ptr<ExecutionTree> tree = Build({op1, op2});
158   ASSERT_OK(tree->Prepare());
159   ASSERT_OK(tree->Launch());
160   DatasetIterator di(tree);
161   TensorMap tensor_map;
162   ASSERT_OK(di.GetNextAsMap(&tensor_map));
163   uint64_t i = 0;
164   std::string_view label = 0;
165   double priority = 0;
166   int64_t id = 0;
167   while (tensor_map.size() != 0) {
168     EXPECT_OK(tensor_map["label"]->GetItemAt(&label, {0}));
169     EXPECT_OK(tensor_map["_priority"]->GetItemAt<double>(&priority, {0}));
170     EXPECT_OK(tensor_map["id"]->GetItemAt<int64_t>(&id, {}));
171     MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
172                   << tensor_map["label"] << "priority: " << priority
173                   << " embedding : " << tensor_map["_embedding"]->shape() << " id: " << id << "\n";
174     i++;
175     ASSERT_OK(di.GetNextAsMap(&tensor_map));
176   }
177   MS_LOG(INFO) << "got rows: " << i << "\n";
178   EXPECT_TRUE(i == 14);
179 }
180