1 /**
2 * Copyright 2019-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "minddata/dataset/engine/datasetops/source/voc_op.h"
17
18 #include <algorithm>
19 #include <fstream>
20
21 #include "utils/file_utils.h"
22 #include "minddata/dataset/core/config_manager.h"
23 #include "minddata/dataset/core/tensor_shape.h"
24 #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
25 #include "minddata/dataset/engine/db_connector.h"
26 #include "minddata/dataset/engine/execution_tree.h"
27 #include "utils/ms_utils.h"
28
29 namespace mindspore {
30 namespace dataset {
31 const char kColumnImage[] = "image";
32 const char kColumnTarget[] = "target";
33 const char kColumnBbox[] = "bbox";
34 const char kColumnLabel[] = "label";
35 const char kColumnDifficult[] = "difficult";
36 const char kColumnTruncate[] = "truncate";
37 const char kJPEGImagesFolder[] = "/JPEGImages/";
38 const char kSegmentationClassFolder[] = "/SegmentationClass/";
39 const char kAnnotationsFolder[] = "/Annotations/";
40 const char kImageSetsSegmentation[] = "/ImageSets/Segmentation/";
41 const char kImageSetsMain[] = "/ImageSets/Main/";
42 const char kImageExtension[] = ".jpg";
43 const char kSegmentationExtension[] = ".png";
44 const char kAnnotationExtension[] = ".xml";
45 const char kImageSetsExtension[] = ".txt";
46
VOCOp(const TaskType & task_type,const std::string & task_mode,const std::string & folder_path,const std::map<std::string,int32_t> & class_index,int32_t num_workers,int32_t queue_size,bool decode,std::unique_ptr<DataSchema> data_schema,std::shared_ptr<SamplerRT> sampler,bool extra_metadata)47 VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
48 const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
49 std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata)
50 : MappableLeafOp(num_workers, queue_size, std::move(sampler)),
51 decode_(decode),
52 row_cnt_(0),
53 task_type_(task_type),
54 usage_(task_mode),
55 folder_path_(folder_path),
56 class_index_(class_index),
57 data_schema_(std::move(data_schema)),
58 extra_metadata_(extra_metadata) {
59 io_block_queues_.Init(num_workers_, queue_size);
60 }
61
Print(std::ostream & out,bool show_all) const62 void VOCOp::Print(std::ostream &out, bool show_all) const {
63 if (!show_all) {
64 // Call the super class for displaying any common 1-liner info
65 ParallelOp::Print(out, show_all);
66 // Then show any custom derived-internal 1-liner info for this op
67 out << "\n";
68 } else {
69 // Call the super class for displaying any common detailed info
70 ParallelOp::Print(out, show_all);
71 // Then show any custom derived-internal stuff
72 out << "\nNumber of rows: " << num_rows_ << "\nVOC Directory: " << folder_path_
73 << "\nDecode: " << (decode_ ? "yes" : "no") << "\n\n";
74 }
75 }
76
LoadTensorRow(row_id_type row_id,TensorRow * trow)77 Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
78 std::string image_id = image_ids_[row_id];
79 std::vector<std::string> path_list;
80 const std::string kImageFile =
81 folder_path_ + std::string(kJPEGImagesFolder) + image_id + std::string(kImageExtension);
82 if (task_type_ == TaskType::Segmentation) {
83 std::shared_ptr<Tensor> image, target;
84 const std::string kTargetFile =
85 folder_path_ + std::string(kSegmentationClassFolder) + image_id + std::string(kSegmentationExtension);
86 RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
87 RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->Column(1), &target));
88 (*trow) = TensorRow(row_id, {std::move(image), std::move(target)});
89 path_list = {kImageFile, kTargetFile};
90 } else if (task_type_ == TaskType::Detection) {
91 std::shared_ptr<Tensor> image;
92 TensorRow annotation;
93 const std::string kAnnotationFile =
94 folder_path_ + std::string(kAnnotationsFolder) + image_id + std::string(kAnnotationExtension);
95 RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
96 RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, &annotation));
97 trow->setId(row_id);
98 trow->push_back(std::move(image));
99 trow->insert(trow->end(), annotation.begin(), annotation.end());
100 path_list = {kImageFile, kAnnotationFile, kAnnotationFile, kAnnotationFile, kAnnotationFile};
101 }
102 if (extra_metadata_) {
103 // Now VOCDataset add a new column named "_meta-filename".
104 std::shared_ptr<Tensor> filename;
105 RETURN_IF_NOT_OK(Tensor::CreateScalar(image_id, &filename));
106 trow->push_back(std::move(filename));
107 path_list.push_back(kImageFile);
108 }
109 trow->setPath(path_list);
110 return Status::OK();
111 }
112
ParseImageIds()113 Status VOCOp::ParseImageIds() {
114 if (!image_ids_.empty()) return Status::OK();
115 std::string image_sets_file;
116 if (task_type_ == TaskType::Segmentation) {
117 image_sets_file = folder_path_ + std::string(kImageSetsSegmentation) + usage_ + std::string(kImageSetsExtension);
118 } else if (task_type_ == TaskType::Detection) {
119 image_sets_file = folder_path_ + std::string(kImageSetsMain) + usage_ + std::string(kImageSetsExtension);
120 }
121
122 auto realpath = FileUtils::GetRealPath(image_sets_file.data());
123 if (!realpath.has_value()) {
124 MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << image_sets_file;
125 RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + image_sets_file);
126 }
127
128 std::ifstream in_file;
129 in_file.open(realpath.value());
130 if (in_file.fail()) {
131 RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + image_sets_file);
132 }
133 std::string id;
134 while (getline(in_file, id)) {
135 if (id.size() > 0 && id[id.size() - 1] == '\r') {
136 image_ids_.push_back(id.substr(0, id.size() - 1));
137 } else {
138 image_ids_.push_back(id);
139 }
140 }
141 in_file.close();
142 image_ids_.shrink_to_fit();
143 num_rows_ = image_ids_.size();
144 return Status::OK();
145 }
146
ParseAnnotationIds()147 Status VOCOp::ParseAnnotationIds() {
148 std::vector<std::string> new_image_ids;
149 for (auto id : image_ids_) {
150 const std::string annotation_name =
151 folder_path_ + std::string(kAnnotationsFolder) + id + std::string(kAnnotationExtension);
152 RETURN_IF_NOT_OK(ParseAnnotationBbox(annotation_name));
153 if (annotation_map_.find(annotation_name) != annotation_map_.end()) {
154 new_image_ids.push_back(id);
155 }
156 }
157
158 if (image_ids_.size() != new_image_ids.size()) {
159 image_ids_.clear();
160 image_ids_.insert(image_ids_.end(), new_image_ids.begin(), new_image_ids.end());
161 }
162 uint32_t count = 0;
163 for (auto &label : label_index_) {
164 label.second = count++;
165 }
166
167 num_rows_ = image_ids_.size();
168 if (num_rows_ == 0) {
169 RETURN_STATUS_UNEXPECTED(
170 "Invalid data, VOCDataset API can't read the data file (interface mismatch or no data found). "
171 "Check file in directory:" +
172 folder_path_);
173 }
174 return Status::OK();
175 }
176
ParseNodeValue(XMLElement * bbox_node,const char * name,float * value)177 void VOCOp::ParseNodeValue(XMLElement *bbox_node, const char *name, float *value) {
178 *value = 0.0;
179 if (bbox_node != nullptr) {
180 XMLElement *node = bbox_node->FirstChildElement(name);
181 if (node != nullptr) {
182 *value = node->FloatText();
183 }
184 }
185 }
186
CheckIfBboxValid(const float & xmin,const float & ymin,const float & xmax,const float & ymax,const std::string & path)187 Status VOCOp::CheckIfBboxValid(const float &xmin, const float &ymin, const float &xmax, const float &ymax,
188 const std::string &path) {
189 if (!(xmin > 0 && ymin > 0 && xmax > xmin && ymax > ymin)) {
190 std::string invalid_bbox = "{" + std::to_string(static_cast<int>(xmin)) + ", " +
191 std::to_string(static_cast<int>(ymin)) + ", " + std::to_string(static_cast<int>(xmax)) +
192 ", " + std::to_string(static_cast<int>(ymax)) + "}";
193 RETURN_STATUS_UNEXPECTED("Invalid bndbox: " + invalid_bbox + " found in " + path);
194 }
195 return Status::OK();
196 }
197
ParseAnnotationBbox(const std::string & path)198 Status VOCOp::ParseAnnotationBbox(const std::string &path) {
199 if (!Path(path).Exists()) {
200 RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + path);
201 }
202 Annotation annotation;
203 XMLDocument doc;
204 XMLError e = doc.LoadFile(common::SafeCStr(path));
205 if (e != XMLError::XML_SUCCESS) {
206 RETURN_STATUS_UNEXPECTED("Invalid file, failed to load xml file: " + path);
207 }
208 XMLElement *root = doc.RootElement();
209 if (root == nullptr) {
210 RETURN_STATUS_UNEXPECTED("Invalid data, failed to load root element for xml file.");
211 }
212 XMLElement *object = root->FirstChildElement("object");
213 if (object == nullptr) {
214 RETURN_STATUS_UNEXPECTED("Invalid data, no object found in " + path);
215 }
216 while (object != nullptr) {
217 std::string label_name;
218 float xmin = 0.0, ymin = 0.0, xmax = 0.0, ymax = 0.0, truncated = 0.0, difficult = 0.0;
219 XMLElement *name_node = object->FirstChildElement("name");
220 if (name_node != nullptr && name_node->GetText() != 0) label_name = name_node->GetText();
221 ParseNodeValue(object, "difficult", &difficult);
222 ParseNodeValue(object, "truncated", &truncated);
223
224 XMLElement *bbox_node = object->FirstChildElement("bndbox");
225 if (bbox_node != nullptr) {
226 ParseNodeValue(bbox_node, "xmin", &xmin);
227 ParseNodeValue(bbox_node, "xmax", &xmax);
228 ParseNodeValue(bbox_node, "ymin", &ymin);
229 ParseNodeValue(bbox_node, "ymax", &ymax);
230 RETURN_IF_NOT_OK(CheckIfBboxValid(xmin, ymin, xmax, ymax, path));
231 } else {
232 RETURN_STATUS_UNEXPECTED("Invalid data, bndbox dismatch in " + path);
233 }
234
235 if (label_name != "" && (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) && xmin > 0 &&
236 ymin > 0 && xmax > xmin && ymax > ymin) {
237 std::vector<float> bbox_list = {xmin, ymin, xmax - xmin, ymax - ymin, difficult, truncated};
238 annotation.emplace_back(std::make_pair(label_name, bbox_list));
239 label_index_[label_name] = 0;
240 }
241 object = object->NextSiblingElement("object");
242 }
243 if (annotation.size() > 0) {
244 annotation_map_[path] = annotation;
245 }
246 return Status::OK();
247 }
248
LaunchThreadsAndInitOp()249 Status VOCOp::LaunchThreadsAndInitOp() {
250 if (tree_ == nullptr) {
251 RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set.");
252 }
253 RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
254 RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
255 RETURN_IF_NOT_OK(
256 tree_->LaunchWorkers(num_workers_, std::bind(&VOCOp::WorkerEntry, this, std::placeholders::_1), "", id()));
257 TaskManager::FindMe()->Post();
258 RETURN_IF_NOT_OK(this->ParseImageIds());
259 if (task_type_ == TaskType::Detection) {
260 RETURN_IF_NOT_OK(this->ParseAnnotationIds());
261 }
262 RETURN_IF_NOT_OK(this->InitSampler());
263 return Status::OK();
264 }
265
ReadImageToTensor(const std::string & path,const ColDescriptor & col,std::shared_ptr<Tensor> * tensor)266 Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
267 RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
268 if (decode_ == true) {
269 Status rc = Decode(*tensor, tensor);
270 if (rc.IsError()) {
271 RETURN_STATUS_UNEXPECTED("Invalid data, failed to decode image: " + path);
272 }
273 }
274 return Status::OK();
275 }
276
277 // When task is Detection, user can get bbox data with four columns:
278 // column ["bbox"] with datatype=float32
279 // column ["label"] with datatype=uint32
280 // column ["difficult"] with datatype=uint32
281 // column ["truncate"] with datatype=uint32
ReadAnnotationToTensor(const std::string & path,TensorRow * row)282 Status VOCOp::ReadAnnotationToTensor(const std::string &path, TensorRow *row) {
283 Annotation annotation = annotation_map_[path];
284 std::shared_ptr<Tensor> bbox, label, difficult, truncate;
285 std::vector<float> bbox_data;
286 std::vector<uint32_t> label_data, difficult_data, truncate_data;
287 dsize_t bbox_num = 0;
288 for (auto item : annotation) {
289 if (label_index_.find(item.first) != label_index_.end()) {
290 if (class_index_.find(item.first) != class_index_.end()) {
291 label_data.push_back(static_cast<uint32_t>(class_index_[item.first]));
292 } else {
293 label_data.push_back(static_cast<uint32_t>(label_index_[item.first]));
294 }
295 CHECK_FAIL_RETURN_UNEXPECTED(
296 item.second.size() == 6,
297 "Invalid parameter, annotation only support 6 parameters, but got " + std::to_string(item.second.size()));
298
299 std::vector<float> tmp_bbox = {(item.second)[0], (item.second)[1], (item.second)[2], (item.second)[3]};
300 bbox_data.insert(bbox_data.end(), tmp_bbox.begin(), tmp_bbox.end());
301 difficult_data.push_back(static_cast<uint32_t>((item.second)[4]));
302 truncate_data.push_back(static_cast<uint32_t>((item.second)[5]));
303 bbox_num++;
304 }
305 }
306 RETURN_IF_NOT_OK(Tensor::CreateFromVector(bbox_data, TensorShape({bbox_num, 4}), &bbox));
307 RETURN_IF_NOT_OK(Tensor::CreateFromVector(label_data, TensorShape({bbox_num, 1}), &label));
308 RETURN_IF_NOT_OK(Tensor::CreateFromVector(difficult_data, TensorShape({bbox_num, 1}), &difficult));
309 RETURN_IF_NOT_OK(Tensor::CreateFromVector(truncate_data, TensorShape({bbox_num, 1}), &truncate));
310 (*row) = TensorRow({std::move(bbox), std::move(label), std::move(difficult), std::move(truncate)});
311 return Status::OK();
312 }
313
CountTotalRows(int64_t * count)314 Status VOCOp::CountTotalRows(int64_t *count) {
315 switch (task_type_) {
316 case TaskType::Detection:
317 RETURN_IF_NOT_OK(ParseImageIds());
318 RETURN_IF_NOT_OK(ParseAnnotationIds());
319 break;
320 case TaskType::Segmentation:
321 RETURN_IF_NOT_OK(ParseImageIds());
322 break;
323 }
324 *count = static_cast<int64_t>(image_ids_.size());
325 return Status::OK();
326 }
327
ComputeColMap()328 Status VOCOp::ComputeColMap() {
329 // Set the column name map (base class field)
330 if (column_name_id_map_.empty()) {
331 for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
332 column_name_id_map_[data_schema_->Column(i).Name()] = i;
333 }
334 } else {
335 MS_LOG(WARNING) << "Column name map is already set!";
336 }
337 return Status::OK();
338 }
339
GetClassIndexing(std::vector<std::pair<std::string,std::vector<int32_t>>> * output_class_indexing)340 Status VOCOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) {
341 if ((*output_class_indexing).empty()) {
342 if (task_type_ != TaskType::Detection) {
343 MS_LOG(ERROR) << "Invalid parameter, GetClassIndexing only valid in \"Detection\" task.";
344 RETURN_STATUS_UNEXPECTED("Invalid parameter, GetClassIndexing only valid in \"Detection\" task.");
345 }
346 RETURN_IF_NOT_OK(ParseImageIds());
347 RETURN_IF_NOT_OK(ParseAnnotationIds());
348 for (const auto &label : label_index_) {
349 if (!class_index_.empty()) {
350 (*output_class_indexing)
351 .emplace_back(std::make_pair(label.first, std::vector<int32_t>(1, class_index_[label.first])));
352 } else {
353 (*output_class_indexing).emplace_back(std::make_pair(label.first, std::vector<int32_t>(1, label.second)));
354 }
355 }
356 }
357 return Status::OK();
358 }
359 } // namespace dataset
360 } // namespace mindspore
361