1 /**
2 * Copyright 2019-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "minddata/dataset/engine/datasetops/source/voc_op.h"
17
18 #include <algorithm>
19 #include <fstream>
20
21 #include "minddata/dataset/core/config_manager.h"
22 #include "minddata/dataset/core/tensor_shape.h"
23 #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
24 #include "minddata/dataset/engine/execution_tree.h"
25 #include "utils/file_utils.h"
26 #include "utils/ms_utils.h"
27
28 namespace mindspore {
29 namespace dataset {
30 const char kColumnImage[] = "image";
31 const char kColumnTarget[] = "target";
32 const char kColumnBbox[] = "bbox";
33 const char kColumnLabel[] = "label";
34 const char kColumnDifficult[] = "difficult";
35 const char kColumnTruncate[] = "truncate";
36 const char kJPEGImagesFolder[] = "/JPEGImages/";
37 const char kSegmentationClassFolder[] = "/SegmentationClass/";
38 const char kAnnotationsFolder[] = "/Annotations/";
39 const char kImageSetsSegmentation[] = "/ImageSets/Segmentation/";
40 const char kImageSetsMain[] = "/ImageSets/Main/";
41 const char kImageExtension[] = ".jpg";
42 const char kSegmentationExtension[] = ".png";
43 const char kAnnotationExtension[] = ".xml";
44 const char kImageSetsExtension[] = ".txt";
45
46 #ifdef ENABLE_PYTHON
VOCOp(const TaskType & task_type,const std::string & task_mode,const std::string & folder_path,const std::map<std::string,int32_t> & class_index,int32_t num_workers,int32_t queue_size,bool decode,std::unique_ptr<DataSchema> data_schema,std::shared_ptr<SamplerRT> sampler,bool extra_metadata,py::function decrypt)47 VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
48 const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
49 std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata,
50 py::function decrypt)
51 : MappableLeafOp(num_workers, queue_size, std::move(sampler)),
52 decode_(decode),
53 row_cnt_(0),
54 task_type_(task_type),
55 usage_(task_mode),
56 folder_path_(folder_path),
57 class_index_(class_index),
58 data_schema_(std::move(data_schema)),
59 extra_metadata_(extra_metadata),
60 decrypt_(std::move(decrypt)) {}
61 #else
VOCOp(const TaskType & task_type,const std::string & task_mode,const std::string & folder_path,const std::map<std::string,int32_t> & class_index,int32_t num_workers,int32_t queue_size,bool decode,std::unique_ptr<DataSchema> data_schema,std::shared_ptr<SamplerRT> sampler,bool extra_metadata)62 VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
63 const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
64 std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata)
65 : MappableLeafOp(num_workers, queue_size, std::move(sampler)),
66 decode_(decode),
67 row_cnt_(0),
68 task_type_(task_type),
69 usage_(task_mode),
70 folder_path_(folder_path),
71 class_index_(class_index),
72 data_schema_(std::move(data_schema)),
73 extra_metadata_(extra_metadata) {}
74 #endif
75
Print(std::ostream & out,bool show_all) const76 void VOCOp::Print(std::ostream &out, bool show_all) const {
77 if (!show_all) {
78 // Call the super class for displaying any common 1-liner info
79 ParallelOp::Print(out, show_all);
80 // Then show any custom derived-internal 1-liner info for this op
81 out << "\n";
82 } else {
83 // Call the super class for displaying any common detailed info
84 ParallelOp::Print(out, show_all);
85 // Then show any custom derived-internal stuff
86 out << "\nNumber of rows: " << num_rows_ << "\nVOC Directory: " << folder_path_
87 << "\nDecode: " << (decode_ ? "yes" : "no") << "\n\n";
88 }
89 }
90
LoadTensorRow(row_id_type row_id,TensorRow * trow)91 Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
92 RETURN_UNEXPECTED_IF_NULL(trow);
93 std::string image_id = image_ids_[row_id];
94 std::vector<std::string> path_list;
95 const std::string kImageFile =
96 folder_path_ + std::string(kJPEGImagesFolder) + image_id + std::string(kImageExtension);
97 if (task_type_ == TaskType::Segmentation) {
98 std::shared_ptr<Tensor> image, target;
99 const std::string kTargetFile =
100 folder_path_ + std::string(kSegmentationClassFolder) + image_id + std::string(kSegmentationExtension);
101 RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
102 RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->Column(1), &target));
103 (*trow) = TensorRow(row_id, {std::move(image), std::move(target)});
104 path_list = {kImageFile, kTargetFile};
105 } else if (task_type_ == TaskType::Detection) {
106 std::shared_ptr<Tensor> image;
107 TensorRow annotation;
108 const std::string kAnnotationFile =
109 folder_path_ + std::string(kAnnotationsFolder) + image_id + std::string(kAnnotationExtension);
110 RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
111 RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, &annotation));
112 trow->setId(row_id);
113 trow->push_back(std::move(image));
114 trow->insert(trow->end(), annotation.begin(), annotation.end());
115 path_list = {kImageFile, kAnnotationFile, kAnnotationFile, kAnnotationFile, kAnnotationFile};
116 }
117 if (extra_metadata_) {
118 // Now VOCDataset add a new column named "_meta-filename".
119 std::shared_ptr<Tensor> filename;
120 RETURN_IF_NOT_OK(Tensor::CreateScalar(image_id, &filename));
121 trow->push_back(std::move(filename));
122 path_list.push_back(kImageFile);
123 }
124 trow->setPath(path_list);
125 return Status::OK();
126 }
127
ParseImageIds()128 Status VOCOp::ParseImageIds() {
129 if (!image_ids_.empty()) {
130 return Status::OK();
131 }
132 std::string image_sets_file;
133 if (task_type_ == TaskType::Segmentation) {
134 image_sets_file = folder_path_ + std::string(kImageSetsSegmentation) + usage_ + std::string(kImageSetsExtension);
135 } else if (task_type_ == TaskType::Detection) {
136 image_sets_file = folder_path_ + std::string(kImageSetsMain) + usage_ + std::string(kImageSetsExtension);
137 }
138
139 auto realpath = FileUtils::GetRealPath(image_sets_file.c_str());
140 if (!realpath.has_value()) {
141 MS_LOG(ERROR) << "Invalid file path, " << image_sets_file << " does not exist.";
142 RETURN_STATUS_UNEXPECTED("Invalid file path, " + image_sets_file + " does not exist.");
143 }
144
145 std::ifstream in_file;
146 in_file.open(realpath.value(), std::ios::in);
147 if (in_file.fail()) {
148 RETURN_STATUS_UNEXPECTED("Invalid ImageSets file, failed to open ImageSets file: " + image_sets_file +
149 ", the file is damaged or permission denied.");
150 }
151 std::string id;
152 while (getline(in_file, id)) {
153 if (id.size() > 0 && id[id.size() - 1] == '\r') {
154 image_ids_.push_back(id.substr(0, id.size() - 1));
155 } else {
156 image_ids_.push_back(id);
157 }
158 }
159 in_file.close();
160 image_ids_.shrink_to_fit();
161 num_rows_ = image_ids_.size();
162 return Status::OK();
163 }
164
ParseAnnotationIds()165 Status VOCOp::ParseAnnotationIds() {
166 std::vector<std::string> new_image_ids;
167 for (auto id : image_ids_) {
168 const std::string annotation_name =
169 folder_path_ + std::string(kAnnotationsFolder) + id + std::string(kAnnotationExtension);
170 RETURN_IF_NOT_OK(ParseAnnotationBbox(annotation_name));
171 if (annotation_map_.find(annotation_name) != annotation_map_.end()) {
172 new_image_ids.push_back(id);
173 }
174 }
175
176 if (image_ids_.size() != new_image_ids.size()) {
177 image_ids_.clear();
178 image_ids_.insert(image_ids_.end(), new_image_ids.begin(), new_image_ids.end());
179 }
180 uint32_t count = 0;
181 for (auto &label : label_index_) {
182 label.second = count++;
183 }
184
185 num_rows_ = image_ids_.size();
186 if (num_rows_ == 0) {
187 RETURN_STATUS_UNEXPECTED(
188 "Invalid data, VOCDataset API can't read the data file (interface mismatch or no data found). "
189 "Check file in directory:" +
190 folder_path_);
191 }
192 return Status::OK();
193 }
194
ParseNodeValue(XMLElement * bbox_node,const char * name,float * value)195 void VOCOp::ParseNodeValue(XMLElement *bbox_node, const char *name, float *value) {
196 *value = 0.0;
197 if (bbox_node != nullptr) {
198 XMLElement *node = bbox_node->FirstChildElement(name);
199 if (node != nullptr) {
200 *value = node->FloatText();
201 }
202 }
203 }
204
CheckIfBboxValid(const float & xmin,const float & ymin,const float & xmax,const float & ymax,const std::string & path)205 Status VOCOp::CheckIfBboxValid(const float &xmin, const float &ymin, const float &xmax, const float &ymax,
206 const std::string &path) {
207 if (!(xmin > 0 && ymin > 0 && xmax > xmin && ymax > ymin)) {
208 std::string invalid_bbox = "{" + std::to_string(static_cast<int>(xmin)) + ", " +
209 std::to_string(static_cast<int>(ymin)) + ", " + std::to_string(static_cast<int>(xmax)) +
210 ", " + std::to_string(static_cast<int>(ymax)) + "}";
211 RETURN_STATUS_UNEXPECTED("Invalid bndbox, the coordinate of bndbox in " + path +
212 " should be greater than 0, but got " + invalid_bbox);
213 }
214 return Status::OK();
215 }
216
ParseAnnotationBbox(const std::string & path)217 Status VOCOp::ParseAnnotationBbox(const std::string &path) {
218 if (!Path(path).Exists()) {
219 RETURN_STATUS_UNEXPECTED("Invalid file path, " + path + " does not exist.");
220 }
221 Annotation annotation;
222 XMLDocument doc;
223 XMLError e = doc.LoadFile(common::SafeCStr(path));
224 if (e != XMLError::XML_SUCCESS) {
225 RETURN_STATUS_UNEXPECTED("Invalid xml, failed to load " + path + ": the xml file is damaged or incorrect format.");
226 }
227 XMLElement *root = doc.RootElement();
228 if (root == nullptr) {
229 RETURN_STATUS_UNEXPECTED("Invalid xml, failed to load root element of " + path +
230 ": the format of xml file is incorrect.");
231 }
232 XMLElement *object = root->FirstChildElement("object");
233 if (object == nullptr) {
234 RETURN_STATUS_UNEXPECTED("Invalid xml, the node of object is missing in " + path + ".");
235 }
236 while (object != nullptr) {
237 std::string label_name;
238 float xmin = 0.0, ymin = 0.0, xmax = 0.0, ymax = 0.0, truncated = 0.0, difficult = 0.0;
239 XMLElement *name_node = object->FirstChildElement("name");
240 if (name_node != nullptr && name_node->GetText() != nullptr) {
241 label_name = name_node->GetText();
242 }
243 ParseNodeValue(object, "difficult", &difficult);
244 ParseNodeValue(object, "truncated", &truncated);
245
246 XMLElement *bbox_node = object->FirstChildElement("bndbox");
247 if (bbox_node != nullptr) {
248 ParseNodeValue(bbox_node, "xmin", &xmin);
249 ParseNodeValue(bbox_node, "xmax", &xmax);
250 ParseNodeValue(bbox_node, "ymin", &ymin);
251 ParseNodeValue(bbox_node, "ymax", &ymax);
252 RETURN_IF_NOT_OK(CheckIfBboxValid(xmin, ymin, xmax, ymax, path));
253 } else {
254 RETURN_STATUS_UNEXPECTED("Invalid xml, the node of bndbox is missing in " + path);
255 }
256
257 if (label_name != "" && (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) && xmin > 0 &&
258 ymin > 0 && xmax > xmin && ymax > ymin) {
259 std::vector<float> bbox_list = {xmin, ymin, xmax - xmin, ymax - ymin, difficult, truncated};
260 annotation.emplace_back(std::make_pair(label_name, bbox_list));
261 label_index_[label_name] = 0;
262 }
263 object = object->NextSiblingElement("object");
264 }
265 if (annotation.size() > 0) {
266 annotation_map_[path] = annotation;
267 }
268 return Status::OK();
269 }
270
PrepareData()271 Status VOCOp::PrepareData() {
272 RETURN_IF_NOT_OK(this->ParseImageIds());
273 if (task_type_ == TaskType::Detection) {
274 RETURN_IF_NOT_OK(this->ParseAnnotationIds());
275 }
276 return Status::OK();
277 }
278
ReadImageToTensor(const std::string & path,const ColDescriptor & col,std::shared_ptr<Tensor> * tensor)279 Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
280 #ifdef ENABLE_PYTHON
281 RETURN_IF_NOT_OK(MappableLeafOp::ImageDecrypt(path, tensor, decrypt_));
282 #else
283 RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
284 #endif
285 if (decode_ == true) {
286 Status rc = Decode(*tensor, tensor);
287 if (rc.IsError()) {
288 RETURN_STATUS_UNEXPECTED("Invalid image, failed to decode " + path +
289 ": the image is damaged or permission denied.");
290 }
291 }
292 return Status::OK();
293 }
294
295 // When task is Detection, user can get bbox data with four columns:
296 // column ["bbox"] with datatype=float32
297 // column ["label"] with datatype=uint32
298 // column ["difficult"] with datatype=uint32
299 // column ["truncate"] with datatype=uint32
ReadAnnotationToTensor(const std::string & path,TensorRow * row)300 Status VOCOp::ReadAnnotationToTensor(const std::string &path, TensorRow *row) {
301 Annotation annotation = annotation_map_[path];
302 std::shared_ptr<Tensor> bbox, label, difficult, truncate;
303 std::vector<float> bbox_data;
304 std::vector<uint32_t> label_data, difficult_data, truncate_data;
305 dsize_t bbox_num = 0;
306 for (auto item : annotation) {
307 if (label_index_.find(item.first) != label_index_.end()) {
308 if (class_index_.find(item.first) != class_index_.end()) {
309 label_data.push_back(static_cast<uint32_t>(class_index_[item.first]));
310 } else {
311 label_data.push_back(static_cast<uint32_t>(label_index_[item.first]));
312 }
313 CHECK_FAIL_RETURN_UNEXPECTED(
314 item.second.size() == 6,
315 "[Internal ERROR] annotation only support 6 parameters, but got " + std::to_string(item.second.size()));
316
317 std::vector<float> tmp_bbox = {(item.second)[0], (item.second)[1], (item.second)[2], (item.second)[3]};
318 bbox_data.insert(bbox_data.end(), tmp_bbox.begin(), tmp_bbox.end());
319 difficult_data.push_back(static_cast<uint32_t>((item.second)[4]));
320 truncate_data.push_back(static_cast<uint32_t>((item.second)[5]));
321 bbox_num++;
322 }
323 }
324 RETURN_IF_NOT_OK(Tensor::CreateFromVector(bbox_data, TensorShape({bbox_num, 4}), &bbox));
325 RETURN_IF_NOT_OK(Tensor::CreateFromVector(label_data, TensorShape({bbox_num, 1}), &label));
326 RETURN_IF_NOT_OK(Tensor::CreateFromVector(difficult_data, TensorShape({bbox_num, 1}), &difficult));
327 RETURN_IF_NOT_OK(Tensor::CreateFromVector(truncate_data, TensorShape({bbox_num, 1}), &truncate));
328 (*row) = TensorRow({std::move(bbox), std::move(label), std::move(difficult), std::move(truncate)});
329 return Status::OK();
330 }
331
CountTotalRows(int64_t * count)332 Status VOCOp::CountTotalRows(int64_t *count) {
333 RETURN_UNEXPECTED_IF_NULL(count);
334 switch (task_type_) {
335 case TaskType::Detection:
336 RETURN_IF_NOT_OK(ParseImageIds());
337 RETURN_IF_NOT_OK(ParseAnnotationIds());
338 break;
339 case TaskType::Segmentation:
340 RETURN_IF_NOT_OK(ParseImageIds());
341 break;
342 }
343 *count = static_cast<int64_t>(image_ids_.size());
344 return Status::OK();
345 }
346
ComputeColMap()347 Status VOCOp::ComputeColMap() {
348 // Set the column name map (base class field)
349 if (column_name_id_map_.empty()) {
350 for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
351 column_name_id_map_[data_schema_->Column(i).Name()] = i;
352 }
353 } else {
354 MS_LOG(WARNING) << "Column name map is already set!";
355 }
356 return Status::OK();
357 }
358
GetClassIndexing(std::vector<std::pair<std::string,std::vector<int32_t>>> * output_class_indexing)359 Status VOCOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) {
360 RETURN_UNEXPECTED_IF_NULL(output_class_indexing);
361 if ((*output_class_indexing).empty()) {
362 if (task_type_ != TaskType::Detection) {
363 MS_LOG(ERROR) << "Invalid task, only 'Detection' task support GetClassIndexing.";
364 RETURN_STATUS_UNEXPECTED("Invalid task, only 'Detection' task support GetClassIndexing.");
365 }
366 RETURN_IF_NOT_OK(ParseImageIds());
367 RETURN_IF_NOT_OK(ParseAnnotationIds());
368 for (const auto &label : label_index_) {
369 if (!class_index_.empty()) {
370 (*output_class_indexing)
371 .emplace_back(std::make_pair(label.first, std::vector<int32_t>(1, class_index_[label.first])));
372 } else {
373 (*output_class_indexing).emplace_back(std::make_pair(label.first, std::vector<int32_t>(1, label.second)));
374 }
375 }
376 }
377 return Status::OK();
378 }
379 } // namespace dataset
380 } // namespace mindspore
381