• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/engine/datasetops/source/voc_op.h"
17 
18 #include <algorithm>
19 #include <fstream>
20 
21 #include "minddata/dataset/core/config_manager.h"
22 #include "minddata/dataset/core/tensor_shape.h"
23 #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
24 #include "minddata/dataset/engine/execution_tree.h"
25 #include "utils/file_utils.h"
26 #include "utils/ms_utils.h"
27 
28 namespace mindspore {
29 namespace dataset {
30 const char kColumnImage[] = "image";
31 const char kColumnTarget[] = "target";
32 const char kColumnBbox[] = "bbox";
33 const char kColumnLabel[] = "label";
34 const char kColumnDifficult[] = "difficult";
35 const char kColumnTruncate[] = "truncate";
36 const char kJPEGImagesFolder[] = "/JPEGImages/";
37 const char kSegmentationClassFolder[] = "/SegmentationClass/";
38 const char kAnnotationsFolder[] = "/Annotations/";
39 const char kImageSetsSegmentation[] = "/ImageSets/Segmentation/";
40 const char kImageSetsMain[] = "/ImageSets/Main/";
41 const char kImageExtension[] = ".jpg";
42 const char kSegmentationExtension[] = ".png";
43 const char kAnnotationExtension[] = ".xml";
44 const char kImageSetsExtension[] = ".txt";
45 
46 #ifdef ENABLE_PYTHON
VOCOp(const TaskType & task_type,const std::string & task_mode,const std::string & folder_path,const std::map<std::string,int32_t> & class_index,int32_t num_workers,int32_t queue_size,bool decode,std::unique_ptr<DataSchema> data_schema,std::shared_ptr<SamplerRT> sampler,bool extra_metadata,py::function decrypt)47 VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
48              const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
49              std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata,
50              py::function decrypt)
51     : MappableLeafOp(num_workers, queue_size, std::move(sampler)),
52       decode_(decode),
53       row_cnt_(0),
54       task_type_(task_type),
55       usage_(task_mode),
56       folder_path_(folder_path),
57       class_index_(class_index),
58       data_schema_(std::move(data_schema)),
59       extra_metadata_(extra_metadata),
60       decrypt_(std::move(decrypt)) {}
61 #else
VOCOp(const TaskType & task_type,const std::string & task_mode,const std::string & folder_path,const std::map<std::string,int32_t> & class_index,int32_t num_workers,int32_t queue_size,bool decode,std::unique_ptr<DataSchema> data_schema,std::shared_ptr<SamplerRT> sampler,bool extra_metadata)62 VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
63              const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
64              std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata)
65     : MappableLeafOp(num_workers, queue_size, std::move(sampler)),
66       decode_(decode),
67       row_cnt_(0),
68       task_type_(task_type),
69       usage_(task_mode),
70       folder_path_(folder_path),
71       class_index_(class_index),
72       data_schema_(std::move(data_schema)),
73       extra_metadata_(extra_metadata) {}
74 #endif
75 
Print(std::ostream & out,bool show_all) const76 void VOCOp::Print(std::ostream &out, bool show_all) const {
77   if (!show_all) {
78     // Call the super class for displaying any common 1-liner info
79     ParallelOp::Print(out, show_all);
80     // Then show any custom derived-internal 1-liner info for this op
81     out << "\n";
82   } else {
83     // Call the super class for displaying any common detailed info
84     ParallelOp::Print(out, show_all);
85     // Then show any custom derived-internal stuff
86     out << "\nNumber of rows: " << num_rows_ << "\nVOC Directory: " << folder_path_
87         << "\nDecode: " << (decode_ ? "yes" : "no") << "\n\n";
88   }
89 }
90 
LoadTensorRow(row_id_type row_id,TensorRow * trow)91 Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
92   RETURN_UNEXPECTED_IF_NULL(trow);
93   std::string image_id = image_ids_[row_id];
94   std::vector<std::string> path_list;
95   const std::string kImageFile =
96     folder_path_ + std::string(kJPEGImagesFolder) + image_id + std::string(kImageExtension);
97   if (task_type_ == TaskType::Segmentation) {
98     std::shared_ptr<Tensor> image, target;
99     const std::string kTargetFile =
100       folder_path_ + std::string(kSegmentationClassFolder) + image_id + std::string(kSegmentationExtension);
101     RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
102     RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->Column(1), &target));
103     (*trow) = TensorRow(row_id, {std::move(image), std::move(target)});
104     path_list = {kImageFile, kTargetFile};
105   } else if (task_type_ == TaskType::Detection) {
106     std::shared_ptr<Tensor> image;
107     TensorRow annotation;
108     const std::string kAnnotationFile =
109       folder_path_ + std::string(kAnnotationsFolder) + image_id + std::string(kAnnotationExtension);
110     RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
111     RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, &annotation));
112     trow->setId(row_id);
113     trow->push_back(std::move(image));
114     trow->insert(trow->end(), annotation.begin(), annotation.end());
115     path_list = {kImageFile, kAnnotationFile, kAnnotationFile, kAnnotationFile, kAnnotationFile};
116   }
117   if (extra_metadata_) {
118     // Now VOCDataset add a new column named "_meta-filename".
119     std::shared_ptr<Tensor> filename;
120     RETURN_IF_NOT_OK(Tensor::CreateScalar(image_id, &filename));
121     trow->push_back(std::move(filename));
122     path_list.push_back(kImageFile);
123   }
124   trow->setPath(path_list);
125   return Status::OK();
126 }
127 
ParseImageIds()128 Status VOCOp::ParseImageIds() {
129   if (!image_ids_.empty()) {
130     return Status::OK();
131   }
132   std::string image_sets_file;
133   if (task_type_ == TaskType::Segmentation) {
134     image_sets_file = folder_path_ + std::string(kImageSetsSegmentation) + usage_ + std::string(kImageSetsExtension);
135   } else if (task_type_ == TaskType::Detection) {
136     image_sets_file = folder_path_ + std::string(kImageSetsMain) + usage_ + std::string(kImageSetsExtension);
137   }
138 
139   auto realpath = FileUtils::GetRealPath(image_sets_file.c_str());
140   if (!realpath.has_value()) {
141     MS_LOG(ERROR) << "Invalid file path, " << image_sets_file << " does not exist.";
142     RETURN_STATUS_UNEXPECTED("Invalid file path, " + image_sets_file + " does not exist.");
143   }
144 
145   std::ifstream in_file;
146   in_file.open(realpath.value(), std::ios::in);
147   if (in_file.fail()) {
148     RETURN_STATUS_UNEXPECTED("Invalid ImageSets file, failed to open ImageSets file: " + image_sets_file +
149                              ", the file is damaged or permission denied.");
150   }
151   std::string id;
152   while (getline(in_file, id)) {
153     if (id.size() > 0 && id[id.size() - 1] == '\r') {
154       image_ids_.push_back(id.substr(0, id.size() - 1));
155     } else {
156       image_ids_.push_back(id);
157     }
158   }
159   in_file.close();
160   image_ids_.shrink_to_fit();
161   num_rows_ = image_ids_.size();
162   return Status::OK();
163 }
164 
ParseAnnotationIds()165 Status VOCOp::ParseAnnotationIds() {
166   std::vector<std::string> new_image_ids;
167   for (auto id : image_ids_) {
168     const std::string annotation_name =
169       folder_path_ + std::string(kAnnotationsFolder) + id + std::string(kAnnotationExtension);
170     RETURN_IF_NOT_OK(ParseAnnotationBbox(annotation_name));
171     if (annotation_map_.find(annotation_name) != annotation_map_.end()) {
172       new_image_ids.push_back(id);
173     }
174   }
175 
176   if (image_ids_.size() != new_image_ids.size()) {
177     image_ids_.clear();
178     image_ids_.insert(image_ids_.end(), new_image_ids.begin(), new_image_ids.end());
179   }
180   uint32_t count = 0;
181   for (auto &label : label_index_) {
182     label.second = count++;
183   }
184 
185   num_rows_ = image_ids_.size();
186   if (num_rows_ == 0) {
187     RETURN_STATUS_UNEXPECTED(
188       "Invalid data, VOCDataset API can't read the data file (interface mismatch or no data found). "
189       "Check file in directory:" +
190       folder_path_);
191   }
192   return Status::OK();
193 }
194 
ParseNodeValue(XMLElement * bbox_node,const char * name,float * value)195 void VOCOp::ParseNodeValue(XMLElement *bbox_node, const char *name, float *value) {
196   *value = 0.0;
197   if (bbox_node != nullptr) {
198     XMLElement *node = bbox_node->FirstChildElement(name);
199     if (node != nullptr) {
200       *value = node->FloatText();
201     }
202   }
203 }
204 
CheckIfBboxValid(const float & xmin,const float & ymin,const float & xmax,const float & ymax,const std::string & path)205 Status VOCOp::CheckIfBboxValid(const float &xmin, const float &ymin, const float &xmax, const float &ymax,
206                                const std::string &path) {
207   if (!(xmin > 0 && ymin > 0 && xmax > xmin && ymax > ymin)) {
208     std::string invalid_bbox = "{" + std::to_string(static_cast<int>(xmin)) + ", " +
209                                std::to_string(static_cast<int>(ymin)) + ", " + std::to_string(static_cast<int>(xmax)) +
210                                ", " + std::to_string(static_cast<int>(ymax)) + "}";
211     RETURN_STATUS_UNEXPECTED("Invalid bndbox, the coordinate of bndbox in " + path +
212                              " should be greater than 0, but got " + invalid_bbox);
213   }
214   return Status::OK();
215 }
216 
ParseAnnotationBbox(const std::string & path)217 Status VOCOp::ParseAnnotationBbox(const std::string &path) {
218   if (!Path(path).Exists()) {
219     RETURN_STATUS_UNEXPECTED("Invalid file path, " + path + " does not exist.");
220   }
221   Annotation annotation;
222   XMLDocument doc;
223   XMLError e = doc.LoadFile(common::SafeCStr(path));
224   if (e != XMLError::XML_SUCCESS) {
225     RETURN_STATUS_UNEXPECTED("Invalid xml, failed to load " + path + ": the xml file is damaged or incorrect format.");
226   }
227   XMLElement *root = doc.RootElement();
228   if (root == nullptr) {
229     RETURN_STATUS_UNEXPECTED("Invalid xml, failed to load root element of " + path +
230                              ": the format of xml file is incorrect.");
231   }
232   XMLElement *object = root->FirstChildElement("object");
233   if (object == nullptr) {
234     RETURN_STATUS_UNEXPECTED("Invalid xml, the node of object is missing in " + path + ".");
235   }
236   while (object != nullptr) {
237     std::string label_name;
238     float xmin = 0.0, ymin = 0.0, xmax = 0.0, ymax = 0.0, truncated = 0.0, difficult = 0.0;
239     XMLElement *name_node = object->FirstChildElement("name");
240     if (name_node != nullptr && name_node->GetText() != nullptr) {
241       label_name = name_node->GetText();
242     }
243     ParseNodeValue(object, "difficult", &difficult);
244     ParseNodeValue(object, "truncated", &truncated);
245 
246     XMLElement *bbox_node = object->FirstChildElement("bndbox");
247     if (bbox_node != nullptr) {
248       ParseNodeValue(bbox_node, "xmin", &xmin);
249       ParseNodeValue(bbox_node, "xmax", &xmax);
250       ParseNodeValue(bbox_node, "ymin", &ymin);
251       ParseNodeValue(bbox_node, "ymax", &ymax);
252       RETURN_IF_NOT_OK(CheckIfBboxValid(xmin, ymin, xmax, ymax, path));
253     } else {
254       RETURN_STATUS_UNEXPECTED("Invalid xml, the node of bndbox is missing in " + path);
255     }
256 
257     if (label_name != "" && (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) && xmin > 0 &&
258         ymin > 0 && xmax > xmin && ymax > ymin) {
259       std::vector<float> bbox_list = {xmin, ymin, xmax - xmin, ymax - ymin, difficult, truncated};
260       annotation.emplace_back(std::make_pair(label_name, bbox_list));
261       label_index_[label_name] = 0;
262     }
263     object = object->NextSiblingElement("object");
264   }
265   if (annotation.size() > 0) {
266     annotation_map_[path] = annotation;
267   }
268   return Status::OK();
269 }
270 
PrepareData()271 Status VOCOp::PrepareData() {
272   RETURN_IF_NOT_OK(this->ParseImageIds());
273   if (task_type_ == TaskType::Detection) {
274     RETURN_IF_NOT_OK(this->ParseAnnotationIds());
275   }
276   return Status::OK();
277 }
278 
ReadImageToTensor(const std::string & path,const ColDescriptor & col,std::shared_ptr<Tensor> * tensor)279 Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
280 #ifdef ENABLE_PYTHON
281   RETURN_IF_NOT_OK(MappableLeafOp::ImageDecrypt(path, tensor, decrypt_));
282 #else
283   RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
284 #endif
285   if (decode_ == true) {
286     Status rc = Decode(*tensor, tensor);
287     if (rc.IsError()) {
288       RETURN_STATUS_UNEXPECTED("Invalid image, failed to decode " + path +
289                                ": the image is damaged or permission denied.");
290     }
291   }
292   return Status::OK();
293 }
294 
295 // When task is Detection, user can get bbox data with four columns:
296 // column ["bbox"] with datatype=float32
297 // column ["label"] with datatype=uint32
298 // column ["difficult"] with datatype=uint32
299 // column ["truncate"] with datatype=uint32
ReadAnnotationToTensor(const std::string & path,TensorRow * row)300 Status VOCOp::ReadAnnotationToTensor(const std::string &path, TensorRow *row) {
301   Annotation annotation = annotation_map_[path];
302   std::shared_ptr<Tensor> bbox, label, difficult, truncate;
303   std::vector<float> bbox_data;
304   std::vector<uint32_t> label_data, difficult_data, truncate_data;
305   dsize_t bbox_num = 0;
306   for (auto item : annotation) {
307     if (label_index_.find(item.first) != label_index_.end()) {
308       if (class_index_.find(item.first) != class_index_.end()) {
309         label_data.push_back(static_cast<uint32_t>(class_index_[item.first]));
310       } else {
311         label_data.push_back(static_cast<uint32_t>(label_index_[item.first]));
312       }
313       CHECK_FAIL_RETURN_UNEXPECTED(
314         item.second.size() == 6,
315         "[Internal ERROR] annotation only support 6 parameters, but got " + std::to_string(item.second.size()));
316 
317       std::vector<float> tmp_bbox = {(item.second)[0], (item.second)[1], (item.second)[2], (item.second)[3]};
318       bbox_data.insert(bbox_data.end(), tmp_bbox.begin(), tmp_bbox.end());
319       difficult_data.push_back(static_cast<uint32_t>((item.second)[4]));
320       truncate_data.push_back(static_cast<uint32_t>((item.second)[5]));
321       bbox_num++;
322     }
323   }
324   RETURN_IF_NOT_OK(Tensor::CreateFromVector(bbox_data, TensorShape({bbox_num, 4}), &bbox));
325   RETURN_IF_NOT_OK(Tensor::CreateFromVector(label_data, TensorShape({bbox_num, 1}), &label));
326   RETURN_IF_NOT_OK(Tensor::CreateFromVector(difficult_data, TensorShape({bbox_num, 1}), &difficult));
327   RETURN_IF_NOT_OK(Tensor::CreateFromVector(truncate_data, TensorShape({bbox_num, 1}), &truncate));
328   (*row) = TensorRow({std::move(bbox), std::move(label), std::move(difficult), std::move(truncate)});
329   return Status::OK();
330 }
331 
CountTotalRows(int64_t * count)332 Status VOCOp::CountTotalRows(int64_t *count) {
333   RETURN_UNEXPECTED_IF_NULL(count);
334   switch (task_type_) {
335     case TaskType::Detection:
336       RETURN_IF_NOT_OK(ParseImageIds());
337       RETURN_IF_NOT_OK(ParseAnnotationIds());
338       break;
339     case TaskType::Segmentation:
340       RETURN_IF_NOT_OK(ParseImageIds());
341       break;
342   }
343   *count = static_cast<int64_t>(image_ids_.size());
344   return Status::OK();
345 }
346 
ComputeColMap()347 Status VOCOp::ComputeColMap() {
348   // Set the column name map (base class field)
349   if (column_name_id_map_.empty()) {
350     for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
351       column_name_id_map_[data_schema_->Column(i).Name()] = i;
352     }
353   } else {
354     MS_LOG(WARNING) << "Column name map is already set!";
355   }
356   return Status::OK();
357 }
358 
GetClassIndexing(std::vector<std::pair<std::string,std::vector<int32_t>>> * output_class_indexing)359 Status VOCOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) {
360   RETURN_UNEXPECTED_IF_NULL(output_class_indexing);
361   if ((*output_class_indexing).empty()) {
362     if (task_type_ != TaskType::Detection) {
363       MS_LOG(ERROR) << "Invalid task, only 'Detection' task support GetClassIndexing.";
364       RETURN_STATUS_UNEXPECTED("Invalid task, only 'Detection' task support GetClassIndexing.");
365     }
366     RETURN_IF_NOT_OK(ParseImageIds());
367     RETURN_IF_NOT_OK(ParseAnnotationIds());
368     for (const auto &label : label_index_) {
369       if (!class_index_.empty()) {
370         (*output_class_indexing)
371           .emplace_back(std::make_pair(label.first, std::vector<int32_t>(1, class_index_[label.first])));
372       } else {
373         (*output_class_indexing).emplace_back(std::make_pair(label.first, std::vector<int32_t>(1, label.second)));
374       }
375     }
376   }
377   return Status::OK();
378 }
379 }  // namespace dataset
380 }  // namespace mindspore
381