1 /** 2 * Copyright 2019-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include <fstream> 17 #include <iostream> 18 #include <memory> 19 #include <string> 20 21 #include "common/common.h" 22 #include "utils/ms_utils.h" 23 #include "minddata/dataset/core/client.h" 24 #include "minddata/dataset/core/global_context.h" 25 #include "minddata/dataset/engine/datasetops/source/manifest_op.h" 26 #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" 27 #include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h" 28 #include "minddata/dataset/util/status.h" 29 #include "gtest/gtest.h" 30 #include "utils/log_adapter.h" 31 #include "securec.h" 32 33 namespace common = mindspore::common; 34 35 using namespace mindspore::dataset; 36 using mindspore::LogStream; 37 using mindspore::ExceptionType::NoExceptionType; 38 using mindspore::MsLogLevel::ERROR; 39 40 std::shared_ptr<ManifestOp> Manifest(int32_t num_works, int32_t rows, int32_t conns, const std::string &file, 41 std::string usage = "train", std::shared_ptr<SamplerRT> sampler = nullptr, 42 std::map<std::string, int32_t> map = {}, bool decode = false) { 43 if (sampler == nullptr) { 44 const int64_t num_samples = 0; 45 const int64_t start_index = 0; 46 sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples); 47 } 48 auto schema = std::make_unique<DataSchema>(); 49 schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)); 50 schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)); 51 return std::make_shared<ManifestOp>(num_works, file, conns, decode, map, std::move(schema), std::move(sampler), 52 usage); 53 } 54 55 class MindDataTestManifest : public UT::DatasetOpTesting { 56 protected: 57 }; 58 59 TEST_F(MindDataTestManifest, TestSequentialManifestWithRepeat) { 60 std::string file = datasets_root_path_ + "/testManifestData/cpp.json"; 61 auto op1 = Manifest(16, 2, 32, file); 62 auto op2 = Repeat(2); 63 op1->SetTotalRepeats(2); 64 op1->SetNumRepeatsPerEpoch(2); 65 auto tree = Build({op1, op2}); 66 tree->Prepare(); 67 uint32_t res[] = {0, 1, 0, 1}; 68 Status rc = tree->Launch(); 69 if (rc.IsError()) { 70 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; 71 EXPECT_TRUE(false); 72 } else { 73 DatasetIterator di(tree); 74 TensorMap tensor_map; 75 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 76 EXPECT_TRUE(rc.IsOk()); 77 uint64_t i = 0; 78 int32_t label = 0; 79 while (tensor_map.size() != 0) { 80 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 81 EXPECT_TRUE(res[i] == label); 82 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; 83 i++; 84 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 85 } 86 EXPECT_TRUE(i == 4); 87 } 88 } 89 90 TEST_F(MindDataTestManifest, TestSubsetRandomSamplerManifest) { 91 std::vector<int64_t> indices({1}); 92 int64_t num_samples = 0; 93 std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(indices, num_samples); 94 std::string file = datasets_root_path_ + "/testManifestData/cpp.json"; 95 // Expect 6 samples for label 0 and 1 96 auto tree = Build({Manifest(16, 2, 32, file, "train", std::move(sampler))}); 97 tree->Prepare(); 98 Status rc = tree->Launch(); 99 if (rc.IsError()) { 100 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; 101 EXPECT_TRUE(false); 102 } else { 103 DatasetIterator di(tree); 104 TensorMap tensor_map; 105 rc = di.GetNextAsMap(&tensor_map); 106 EXPECT_TRUE(rc.IsOk()); 107 uint64_t i = 0; 108 int32_t label = 0; 109 while (tensor_map.size() != 0) { 110 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 111 i++; 112 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 113 EXPECT_EQ(label, 1); 114 } 115 EXPECT_TRUE(i == 1); 116 } 117 } 118 119 TEST_F(MindDataTestManifest, MindDataTestManifestClassIndex) { 120 std::string file = datasets_root_path_ + "/testManifestData/cpp.json"; 121 std::map<std::string, int32_t> map; 122 map["cat"] = 111; // forward slash is not good, but we need to add this somewhere, also in windows, its a '\' 123 map["dog"] = 222; // forward slash is not good, but we need to add this somewhere, also in windows, its a '\' 124 map["wrong folder name"] = 1234; // this is skipped 125 auto tree = Build({Manifest(16, 2, 32, file, "train", nullptr, map)}); 126 uint64_t res[2] = {111, 222}; 127 tree->Prepare(); 128 Status rc = tree->Launch(); 129 if (rc.IsError()) { 130 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; 131 EXPECT_TRUE(false); 132 } else { 133 DatasetIterator di(tree); 134 TensorMap tensor_map; 135 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 136 EXPECT_TRUE(rc.IsOk()); 137 uint64_t i = 0; 138 int32_t label = 0; 139 while (tensor_map.size() != 0) { 140 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 141 EXPECT_TRUE(label == res[i]); 142 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; 143 i++; 144 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 145 } 146 EXPECT_TRUE(i == 2); 147 } 148 } 149 150 TEST_F(MindDataTestManifest, MindDataTestManifestNumSamples) { 151 std::string file = datasets_root_path_ + "/testManifestData/cpp.json"; 152 int64_t num_samples = 1; 153 int64_t start_index = 0; 154 auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples); 155 auto op1 = Manifest(16, 2, 32, file, "train", std::move(seq_sampler), {}); 156 auto op2 = Repeat(4); 157 op1->SetTotalRepeats(4); 158 op1->SetNumRepeatsPerEpoch(4); 159 auto tree = Build({op1, op2}); 160 tree->Prepare(); 161 Status rc = tree->Launch(); 162 if (rc.IsError()) { 163 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; 164 EXPECT_TRUE(false); 165 } else { 166 DatasetIterator di(tree); 167 TensorMap tensor_map; 168 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 169 EXPECT_TRUE(rc.IsOk()); 170 uint64_t i = 0; 171 int32_t label = 0; 172 while (tensor_map.size() != 0) { 173 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 174 EXPECT_TRUE(0 == label); 175 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; 176 i++; 177 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 178 } 179 EXPECT_TRUE(i == 4); 180 } 181 } 182 183 TEST_F(MindDataTestManifest, MindDataTestManifestEval) { 184 std::string file = datasets_root_path_ + "/testManifestData/cpp.json"; 185 int64_t num_samples = 1; 186 int64_t start_index = 0; 187 auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples); 188 auto tree = Build({Manifest(16, 2, 32, file, "eval", std::move(seq_sampler), {})}); 189 tree->Prepare(); 190 Status rc = tree->Launch(); 191 if (rc.IsError()) { 192 MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << "."; 193 EXPECT_TRUE(false); 194 } else { 195 DatasetIterator di(tree); 196 TensorMap tensor_map; 197 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 198 EXPECT_TRUE(rc.IsOk()); 199 uint64_t i = 0; 200 int32_t label = 0; 201 while (tensor_map.size() != 0) { 202 tensor_map["label"]->GetItemAt<int32_t>(&label, {}); 203 EXPECT_TRUE(0 == label); 204 MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; 205 i++; 206 ASSERT_OK(di.GetNextAsMap(&tensor_map)); 207 } 208 EXPECT_TRUE(i == 1); 209 } 210 } 211