1 /** 2 * Copyright 2020-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include <memory> 17 #include <string> 18 #include "common/common.h" 19 #include "minddata/dataset/core/client.h" 20 #include "minddata/dataset/engine/data_schema.h" 21 #include "minddata/dataset/engine/datasetops/source/album_op.h" 22 #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" 23 24 #include "minddata/dataset/util/status.h" 25 #include "gtest/gtest.h" 26 #include "utils/log_adapter.h" 27 #include "securec.h" 28 #include "minddata/dataset/include/dataset/datasets.h" 29 #include "minddata/dataset/include/dataset/transforms.h" 30 31 using namespace mindspore::dataset; 32 using mindspore::LogStream; 33 using mindspore::ExceptionType::NoExceptionType; 34 using mindspore::MsLogLevel::ERROR; 35 36 // std::shared_ptr<RepeatOp> Repeat(int repeat_cnt); 37 38 // std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops); 39 40 std::shared_ptr<AlbumOp> AlbumSchema(int64_t num_works, int64_t conns, std::string path, std::string schema_file, 41 std::vector<std::string> column_names = {}, bool shuf = false, 42 std::shared_ptr<SamplerRT> sampler = nullptr, bool decode = false) { 43 auto schema = std::make_unique<DataSchema>(); 44 // AlbumOp constructor for reference 45 // AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode, 46 // const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema, 47 // std::shared_ptr<SamplerRT> sampler) 48 49 // default schema construction: 50 51 (void)schema->LoadSchemaFile(schema_file, column_names); 52 std::set<std::string> ext = {".json"}; 53 if (sampler == nullptr) { 54 const int64_t num_samples = 0; // default num samples of 0 means to sample entire set of data 55 const int64_t start_index = 0; 56 sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples); 57 } 58 std::shared_ptr<AlbumOp> so = 59 std::make_shared<AlbumOp>(num_works, path, conns, decode, ext, std::move(schema), std::move(sampler)); 60 return so; 61 } 62 63 class MindDataTestAlbum : public UT::DatasetOpTesting { 64 protected: 65 }; 66 67 TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchema) { 68 std::string folder_path = datasets_root_path_ + "/testAlbum/images"; 69 std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json"; 70 std::vector<std::string> column_names = {"image", "label", "id"}; 71 auto op1 = AlbumSchema(16, 32, folder_path, schema_file, column_names, false); 72 std::shared_ptr<RepeatOp> op2 = Repeat(2); 73 op1->SetTotalRepeats(2); 74 op1->SetNumRepeatsPerEpoch(2); 75 std::shared_ptr<ExecutionTree> tree = Build({op1, op2}); 76 ASSERT_OK(tree->Prepare()); 77 ASSERT_OK(tree->Launch()); 78 DatasetIterator di(tree); 79 TensorMap tensor_map; 80 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 81 uint64_t i = 0; 82 std::string_view label = 0; 83 while (tensor_map.size() != 0) { 84 EXPECT_TRUE(tensor_map["label"]->GetItemAt(&label, {0})); 85 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape" 86 << tensor_map["label"] << "\n"; 87 i++; 88 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 89 } 90 MS_LOG(INFO) << "got rows: " << i << "\n"; 91 EXPECT_TRUE(i == 14); 92 } 93 94 TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaNoOrder) { 95 std::string folder_path = datasets_root_path_ + "/testAlbum/images"; 96 std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json"; 97 auto op1 = AlbumSchema(16, 32, folder_path, schema_file); 98 std::shared_ptr<RepeatOp> op2 = Repeat(2); 99 op1->SetTotalRepeats(2); 100 op1->SetNumRepeatsPerEpoch(2); 101 std::shared_ptr<ExecutionTree> tree = Build({op1, op2}); 102 ASSERT_OK(tree->Prepare()); 103 ASSERT_OK(tree->Launch()); 104 DatasetIterator di(tree); 105 TensorMap tensor_map; 106 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 107 uint64_t i = 0; 108 std::string_view label; 109 while (tensor_map.size() != 0) { 110 EXPECT_OK(tensor_map["label"]->GetItemAt(&label, {0})); 111 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape" 112 << tensor_map["label"] << "\n"; 113 i++; 114 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 115 } 116 MS_LOG(INFO) << "got rows: " << i << "\n"; 117 EXPECT_TRUE(i == 14); 118 } 119 120 TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaFloat) { 121 std::string folder_path = datasets_root_path_ + "/testAlbum/images"; 122 // add the priority column 123 std::string schema_file = datasets_root_path_ + "/testAlbum/floatSchema.json"; 124 auto op1 = AlbumSchema(16, 32, folder_path, schema_file); 125 std::shared_ptr<RepeatOp> op2 = Repeat(2); 126 op1->SetTotalRepeats(2); 127 op1->SetNumRepeatsPerEpoch(2); 128 std::shared_ptr<ExecutionTree> tree = Build({op1, op2}); 129 tree->Prepare(); 130 ASSERT_OK(tree->Launch()); 131 DatasetIterator di(tree); 132 TensorMap tensor_map; 133 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 134 uint64_t i = 0; 135 std::string_view label; 136 double priority = 0; 137 while (tensor_map.size() != 0) { 138 EXPECT_OK(tensor_map["label"]->GetItemAt(&label, {0})); 139 EXPECT_OK(tensor_map["_priority"]->GetItemAt<double>(&priority, {0})); 140 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape" 141 << tensor_map["label"] << "priority: " << priority << "\n"; 142 i++; 143 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 144 } 145 MS_LOG(INFO) << "got rows: " << i << "\n"; 146 EXPECT_TRUE(i == 14); 147 } 148 149 TEST_F(MindDataTestAlbum, TestSequentialAlbumWithFullSchema) { 150 std::string folder_path = datasets_root_path_ + "/testAlbum/images"; 151 // add the priority column 152 std::string schema_file = datasets_root_path_ + "/testAlbum/fullSchema.json"; 153 auto op1 = AlbumSchema(16, 32, folder_path, schema_file); 154 std::shared_ptr<RepeatOp> op2 = Repeat(2); 155 op1->SetTotalRepeats(2); 156 op1->SetNumRepeatsPerEpoch(2); 157 std::shared_ptr<ExecutionTree> tree = Build({op1, op2}); 158 ASSERT_OK(tree->Prepare()); 159 ASSERT_OK(tree->Launch()); 160 DatasetIterator di(tree); 161 TensorMap tensor_map; 162 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 163 uint64_t i = 0; 164 std::string_view label = 0; 165 double priority = 0; 166 int64_t id = 0; 167 while (tensor_map.size() != 0) { 168 EXPECT_OK(tensor_map["label"]->GetItemAt(&label, {0})); 169 EXPECT_OK(tensor_map["_priority"]->GetItemAt<double>(&priority, {0})); 170 EXPECT_OK(tensor_map["id"]->GetItemAt<int64_t>(&id, {})); 171 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape" 172 << tensor_map["label"] << "priority: " << priority 173 << " embedding : " << tensor_map["_embedding"]->shape() << " id: " << id << "\n"; 174 i++; 175 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 176 } 177 MS_LOG(INFO) << "got rows: " << i << "\n"; 178 EXPECT_TRUE(i == 14); 179 } 180