• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <fstream>
17 #include <iostream>
18 #include <memory>
19 #include <string>
20 
21 #include "common/common.h"
22 #include "utils/ms_utils.h"
23 #include "minddata/dataset/core/client.h"
24 #include "minddata/dataset/core/global_context.h"
25 #include "minddata/dataset/engine/datasetops/source/manifest_op.h"
26 #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
27 #include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
28 #include "minddata/dataset/util/status.h"
29 #include "gtest/gtest.h"
30 #include "utils/log_adapter.h"
31 #include "securec.h"
32 
33 namespace common = mindspore::common;
34 
35 using namespace mindspore::dataset;
36 using mindspore::LogStream;
37 using mindspore::ExceptionType::NoExceptionType;
38 using mindspore::MsLogLevel::ERROR;
39 
Manifest(int32_t num_works,int32_t rows,int32_t conns,const std::string & file,std::string usage="train",std::shared_ptr<SamplerRT> sampler=nullptr,std::map<std::string,int32_t> map={},bool decode=false)40 std::shared_ptr<ManifestOp> Manifest(int32_t num_works, int32_t rows, int32_t conns, const std::string &file,
41                                      std::string usage = "train", std::shared_ptr<SamplerRT> sampler = nullptr,
42                                      std::map<std::string, int32_t> map = {}, bool decode = false) {
43   if (sampler == nullptr) {
44     const int64_t num_samples = 0;
45     const int64_t start_index = 0;
46     sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
47   }
48   auto schema = std::make_unique<DataSchema>();
49   schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1));
50   schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1));
51   return std::make_shared<ManifestOp>(num_works, file, conns, decode, map, std::move(schema), std::move(sampler),
52                                       usage);
53 }
54 
55 class MindDataTestManifest : public UT::DatasetOpTesting {
56  protected:
57 };
58 
TEST_F(MindDataTestManifest,TestSequentialManifestWithRepeat)59 TEST_F(MindDataTestManifest, TestSequentialManifestWithRepeat) {
60   std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
61   auto op1 = Manifest(16, 2, 32, file);
62   auto op2 = Repeat(2);
63   op1->SetTotalRepeats(2);
64   op1->SetNumRepeatsPerEpoch(2);
65   auto tree = Build({op1, op2});
66   tree->Prepare();
67   uint32_t res[] = {0, 1, 0, 1};
68   Status rc = tree->Launch();
69   if (rc.IsError()) {
70     MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
71     EXPECT_TRUE(false);
72   } else {
73     DatasetIterator di(tree);
74     TensorMap tensor_map;
75     ASSERT_OK(di.GetNextAsMap(&tensor_map));
76     EXPECT_TRUE(rc.IsOk());
77     uint64_t i = 0;
78     int32_t label = 0;
79     while (tensor_map.size() != 0) {
80       tensor_map["label"]->GetItemAt<int32_t>(&label, {});
81       EXPECT_TRUE(res[i] == label);
82       MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
83       i++;
84       ASSERT_OK(di.GetNextAsMap(&tensor_map));
85     }
86     EXPECT_TRUE(i == 4);
87   }
88 }
89 
TEST_F(MindDataTestManifest,TestSubsetRandomSamplerManifest)90 TEST_F(MindDataTestManifest, TestSubsetRandomSamplerManifest) {
91   std::vector<int64_t> indices({1});
92   int64_t num_samples = 0;
93   std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(indices, num_samples);
94   std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
95   // Expect 6 samples for label 0 and 1
96   auto tree = Build({Manifest(16, 2, 32, file, "train", std::move(sampler))});
97   tree->Prepare();
98   Status rc = tree->Launch();
99   if (rc.IsError()) {
100     MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
101     EXPECT_TRUE(false);
102   } else {
103     DatasetIterator di(tree);
104     TensorMap tensor_map;
105     rc = di.GetNextAsMap(&tensor_map);
106     EXPECT_TRUE(rc.IsOk());
107     uint64_t i = 0;
108     int32_t label = 0;
109     while (tensor_map.size() != 0) {
110       tensor_map["label"]->GetItemAt<int32_t>(&label, {});
111       i++;
112       ASSERT_OK(di.GetNextAsMap(&tensor_map));
113       EXPECT_EQ(label, 1);
114     }
115     EXPECT_TRUE(i == 1);
116   }
117 }
118 
TEST_F(MindDataTestManifest,MindDataTestManifestClassIndex)119 TEST_F(MindDataTestManifest, MindDataTestManifestClassIndex) {
120   std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
121   std::map<std::string, int32_t> map;
122   map["cat"] = 111;  // forward slash is not good, but we need to add this somewhere, also in windows, its a '\'
123   map["dog"] = 222;  // forward slash is not good, but we need to add this somewhere, also in windows, its a '\'
124   map["wrong folder name"] = 1234;  // this is skipped
125   auto tree = Build({Manifest(16, 2, 32, file, "train", nullptr, map)});
126   uint64_t res[2] = {111, 222};
127   tree->Prepare();
128   Status rc = tree->Launch();
129   if (rc.IsError()) {
130     MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
131     EXPECT_TRUE(false);
132   } else {
133     DatasetIterator di(tree);
134     TensorMap tensor_map;
135     ASSERT_OK(di.GetNextAsMap(&tensor_map));
136     EXPECT_TRUE(rc.IsOk());
137     uint64_t i = 0;
138     int32_t label = 0;
139     while (tensor_map.size() != 0) {
140       tensor_map["label"]->GetItemAt<int32_t>(&label, {});
141       EXPECT_TRUE(label == res[i]);
142       MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
143       i++;
144       ASSERT_OK(di.GetNextAsMap(&tensor_map));
145     }
146     EXPECT_TRUE(i == 2);
147   }
148 }
149 
TEST_F(MindDataTestManifest,MindDataTestManifestNumSamples)150 TEST_F(MindDataTestManifest, MindDataTestManifestNumSamples) {
151   std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
152   int64_t num_samples = 1;
153   int64_t start_index = 0;
154   auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
155   auto op1 = Manifest(16, 2, 32, file, "train", std::move(seq_sampler), {});
156   auto op2 = Repeat(4);
157   op1->SetTotalRepeats(4);
158   op1->SetNumRepeatsPerEpoch(4);
159   auto tree = Build({op1, op2});
160   tree->Prepare();
161   Status rc = tree->Launch();
162   if (rc.IsError()) {
163     MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
164     EXPECT_TRUE(false);
165   } else {
166     DatasetIterator di(tree);
167     TensorMap tensor_map;
168     ASSERT_OK(di.GetNextAsMap(&tensor_map));
169     EXPECT_TRUE(rc.IsOk());
170     uint64_t i = 0;
171     int32_t label = 0;
172     while (tensor_map.size() != 0) {
173       tensor_map["label"]->GetItemAt<int32_t>(&label, {});
174       EXPECT_TRUE(0 == label);
175       MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
176       i++;
177       ASSERT_OK(di.GetNextAsMap(&tensor_map));
178     }
179     EXPECT_TRUE(i == 4);
180   }
181 }
182 
TEST_F(MindDataTestManifest,MindDataTestManifestEval)183 TEST_F(MindDataTestManifest, MindDataTestManifestEval) {
184   std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
185   int64_t num_samples = 1;
186   int64_t start_index = 0;
187   auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
188   auto tree = Build({Manifest(16, 2, 32, file, "eval", std::move(seq_sampler), {})});
189   tree->Prepare();
190   Status rc = tree->Launch();
191   if (rc.IsError()) {
192     MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
193     EXPECT_TRUE(false);
194   } else {
195     DatasetIterator di(tree);
196     TensorMap tensor_map;
197     ASSERT_OK(di.GetNextAsMap(&tensor_map));
198     EXPECT_TRUE(rc.IsOk());
199     uint64_t i = 0;
200     int32_t label = 0;
201     while (tensor_map.size() != 0) {
202       tensor_map["label"]->GetItemAt<int32_t>(&label, {});
203       EXPECT_TRUE(0 == label);
204       MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
205       i++;
206       ASSERT_OK(di.GetNextAsMap(&tensor_map));
207     }
208     EXPECT_TRUE(i == 1);
209   }
210 }
211