• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "minddata/dataset/engine/datasetops/source/voc_op.h"
17 
18 #include <algorithm>
19 #include <fstream>
20 
21 #include "utils/file_utils.h"
22 #include "minddata/dataset/core/config_manager.h"
23 #include "minddata/dataset/core/tensor_shape.h"
24 #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
25 #include "minddata/dataset/engine/db_connector.h"
26 #include "minddata/dataset/engine/execution_tree.h"
27 #include "utils/ms_utils.h"
28 
29 namespace mindspore {
30 namespace dataset {
31 const char kColumnImage[] = "image";
32 const char kColumnTarget[] = "target";
33 const char kColumnBbox[] = "bbox";
34 const char kColumnLabel[] = "label";
35 const char kColumnDifficult[] = "difficult";
36 const char kColumnTruncate[] = "truncate";
37 const char kJPEGImagesFolder[] = "/JPEGImages/";
38 const char kSegmentationClassFolder[] = "/SegmentationClass/";
39 const char kAnnotationsFolder[] = "/Annotations/";
40 const char kImageSetsSegmentation[] = "/ImageSets/Segmentation/";
41 const char kImageSetsMain[] = "/ImageSets/Main/";
42 const char kImageExtension[] = ".jpg";
43 const char kSegmentationExtension[] = ".png";
44 const char kAnnotationExtension[] = ".xml";
45 const char kImageSetsExtension[] = ".txt";
46 
VOCOp(const TaskType & task_type,const std::string & task_mode,const std::string & folder_path,const std::map<std::string,int32_t> & class_index,int32_t num_workers,int32_t queue_size,bool decode,std::unique_ptr<DataSchema> data_schema,std::shared_ptr<SamplerRT> sampler,bool extra_metadata)47 VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
48              const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
49              std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata)
50     : MappableLeafOp(num_workers, queue_size, std::move(sampler)),
51       decode_(decode),
52       row_cnt_(0),
53       task_type_(task_type),
54       usage_(task_mode),
55       folder_path_(folder_path),
56       class_index_(class_index),
57       data_schema_(std::move(data_schema)),
58       extra_metadata_(extra_metadata) {
59   io_block_queues_.Init(num_workers_, queue_size);
60 }
61 
Print(std::ostream & out,bool show_all) const62 void VOCOp::Print(std::ostream &out, bool show_all) const {
63   if (!show_all) {
64     // Call the super class for displaying any common 1-liner info
65     ParallelOp::Print(out, show_all);
66     // Then show any custom derived-internal 1-liner info for this op
67     out << "\n";
68   } else {
69     // Call the super class for displaying any common detailed info
70     ParallelOp::Print(out, show_all);
71     // Then show any custom derived-internal stuff
72     out << "\nNumber of rows: " << num_rows_ << "\nVOC Directory: " << folder_path_
73         << "\nDecode: " << (decode_ ? "yes" : "no") << "\n\n";
74   }
75 }
76 
LoadTensorRow(row_id_type row_id,TensorRow * trow)77 Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
78   std::string image_id = image_ids_[row_id];
79   std::vector<std::string> path_list;
80   const std::string kImageFile =
81     folder_path_ + std::string(kJPEGImagesFolder) + image_id + std::string(kImageExtension);
82   if (task_type_ == TaskType::Segmentation) {
83     std::shared_ptr<Tensor> image, target;
84     const std::string kTargetFile =
85       folder_path_ + std::string(kSegmentationClassFolder) + image_id + std::string(kSegmentationExtension);
86     RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
87     RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->Column(1), &target));
88     (*trow) = TensorRow(row_id, {std::move(image), std::move(target)});
89     path_list = {kImageFile, kTargetFile};
90   } else if (task_type_ == TaskType::Detection) {
91     std::shared_ptr<Tensor> image;
92     TensorRow annotation;
93     const std::string kAnnotationFile =
94       folder_path_ + std::string(kAnnotationsFolder) + image_id + std::string(kAnnotationExtension);
95     RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
96     RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, &annotation));
97     trow->setId(row_id);
98     trow->push_back(std::move(image));
99     trow->insert(trow->end(), annotation.begin(), annotation.end());
100     path_list = {kImageFile, kAnnotationFile, kAnnotationFile, kAnnotationFile, kAnnotationFile};
101   }
102   if (extra_metadata_) {
103     // Now VOCDataset add a new column named "_meta-filename".
104     std::shared_ptr<Tensor> filename;
105     RETURN_IF_NOT_OK(Tensor::CreateScalar(image_id, &filename));
106     trow->push_back(std::move(filename));
107     path_list.push_back(kImageFile);
108   }
109   trow->setPath(path_list);
110   return Status::OK();
111 }
112 
ParseImageIds()113 Status VOCOp::ParseImageIds() {
114   if (!image_ids_.empty()) return Status::OK();
115   std::string image_sets_file;
116   if (task_type_ == TaskType::Segmentation) {
117     image_sets_file = folder_path_ + std::string(kImageSetsSegmentation) + usage_ + std::string(kImageSetsExtension);
118   } else if (task_type_ == TaskType::Detection) {
119     image_sets_file = folder_path_ + std::string(kImageSetsMain) + usage_ + std::string(kImageSetsExtension);
120   }
121 
122   auto realpath = FileUtils::GetRealPath(image_sets_file.data());
123   if (!realpath.has_value()) {
124     MS_LOG(ERROR) << "Invalid file, get real path failed, path=" << image_sets_file;
125     RETURN_STATUS_UNEXPECTED("Invalid file, get real path failed, path=" + image_sets_file);
126   }
127 
128   std::ifstream in_file;
129   in_file.open(realpath.value());
130   if (in_file.fail()) {
131     RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + image_sets_file);
132   }
133   std::string id;
134   while (getline(in_file, id)) {
135     if (id.size() > 0 && id[id.size() - 1] == '\r') {
136       image_ids_.push_back(id.substr(0, id.size() - 1));
137     } else {
138       image_ids_.push_back(id);
139     }
140   }
141   in_file.close();
142   image_ids_.shrink_to_fit();
143   num_rows_ = image_ids_.size();
144   return Status::OK();
145 }
146 
ParseAnnotationIds()147 Status VOCOp::ParseAnnotationIds() {
148   std::vector<std::string> new_image_ids;
149   for (auto id : image_ids_) {
150     const std::string annotation_name =
151       folder_path_ + std::string(kAnnotationsFolder) + id + std::string(kAnnotationExtension);
152     RETURN_IF_NOT_OK(ParseAnnotationBbox(annotation_name));
153     if (annotation_map_.find(annotation_name) != annotation_map_.end()) {
154       new_image_ids.push_back(id);
155     }
156   }
157 
158   if (image_ids_.size() != new_image_ids.size()) {
159     image_ids_.clear();
160     image_ids_.insert(image_ids_.end(), new_image_ids.begin(), new_image_ids.end());
161   }
162   uint32_t count = 0;
163   for (auto &label : label_index_) {
164     label.second = count++;
165   }
166 
167   num_rows_ = image_ids_.size();
168   if (num_rows_ == 0) {
169     RETURN_STATUS_UNEXPECTED(
170       "Invalid data, VOCDataset API can't read the data file (interface mismatch or no data found). "
171       "Check file in directory:" +
172       folder_path_);
173   }
174   return Status::OK();
175 }
176 
ParseNodeValue(XMLElement * bbox_node,const char * name,float * value)177 void VOCOp::ParseNodeValue(XMLElement *bbox_node, const char *name, float *value) {
178   *value = 0.0;
179   if (bbox_node != nullptr) {
180     XMLElement *node = bbox_node->FirstChildElement(name);
181     if (node != nullptr) {
182       *value = node->FloatText();
183     }
184   }
185 }
186 
CheckIfBboxValid(const float & xmin,const float & ymin,const float & xmax,const float & ymax,const std::string & path)187 Status VOCOp::CheckIfBboxValid(const float &xmin, const float &ymin, const float &xmax, const float &ymax,
188                                const std::string &path) {
189   if (!(xmin > 0 && ymin > 0 && xmax > xmin && ymax > ymin)) {
190     std::string invalid_bbox = "{" + std::to_string(static_cast<int>(xmin)) + ", " +
191                                std::to_string(static_cast<int>(ymin)) + ", " + std::to_string(static_cast<int>(xmax)) +
192                                ", " + std::to_string(static_cast<int>(ymax)) + "}";
193     RETURN_STATUS_UNEXPECTED("Invalid bndbox: " + invalid_bbox + " found in " + path);
194   }
195   return Status::OK();
196 }
197 
ParseAnnotationBbox(const std::string & path)198 Status VOCOp::ParseAnnotationBbox(const std::string &path) {
199   if (!Path(path).Exists()) {
200     RETURN_STATUS_UNEXPECTED("Invalid file, failed to open file: " + path);
201   }
202   Annotation annotation;
203   XMLDocument doc;
204   XMLError e = doc.LoadFile(common::SafeCStr(path));
205   if (e != XMLError::XML_SUCCESS) {
206     RETURN_STATUS_UNEXPECTED("Invalid file, failed to load xml file: " + path);
207   }
208   XMLElement *root = doc.RootElement();
209   if (root == nullptr) {
210     RETURN_STATUS_UNEXPECTED("Invalid data, failed to load root element for xml file.");
211   }
212   XMLElement *object = root->FirstChildElement("object");
213   if (object == nullptr) {
214     RETURN_STATUS_UNEXPECTED("Invalid data, no object found in " + path);
215   }
216   while (object != nullptr) {
217     std::string label_name;
218     float xmin = 0.0, ymin = 0.0, xmax = 0.0, ymax = 0.0, truncated = 0.0, difficult = 0.0;
219     XMLElement *name_node = object->FirstChildElement("name");
220     if (name_node != nullptr && name_node->GetText() != 0) label_name = name_node->GetText();
221     ParseNodeValue(object, "difficult", &difficult);
222     ParseNodeValue(object, "truncated", &truncated);
223 
224     XMLElement *bbox_node = object->FirstChildElement("bndbox");
225     if (bbox_node != nullptr) {
226       ParseNodeValue(bbox_node, "xmin", &xmin);
227       ParseNodeValue(bbox_node, "xmax", &xmax);
228       ParseNodeValue(bbox_node, "ymin", &ymin);
229       ParseNodeValue(bbox_node, "ymax", &ymax);
230       RETURN_IF_NOT_OK(CheckIfBboxValid(xmin, ymin, xmax, ymax, path));
231     } else {
232       RETURN_STATUS_UNEXPECTED("Invalid data, bndbox dismatch in " + path);
233     }
234 
235     if (label_name != "" && (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) && xmin > 0 &&
236         ymin > 0 && xmax > xmin && ymax > ymin) {
237       std::vector<float> bbox_list = {xmin, ymin, xmax - xmin, ymax - ymin, difficult, truncated};
238       annotation.emplace_back(std::make_pair(label_name, bbox_list));
239       label_index_[label_name] = 0;
240     }
241     object = object->NextSiblingElement("object");
242   }
243   if (annotation.size() > 0) {
244     annotation_map_[path] = annotation;
245   }
246   return Status::OK();
247 }
248 
LaunchThreadsAndInitOp()249 Status VOCOp::LaunchThreadsAndInitOp() {
250   if (tree_ == nullptr) {
251     RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set.");
252   }
253   RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
254   RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
255   RETURN_IF_NOT_OK(
256     tree_->LaunchWorkers(num_workers_, std::bind(&VOCOp::WorkerEntry, this, std::placeholders::_1), "", id()));
257   TaskManager::FindMe()->Post();
258   RETURN_IF_NOT_OK(this->ParseImageIds());
259   if (task_type_ == TaskType::Detection) {
260     RETURN_IF_NOT_OK(this->ParseAnnotationIds());
261   }
262   RETURN_IF_NOT_OK(this->InitSampler());
263   return Status::OK();
264 }
265 
ReadImageToTensor(const std::string & path,const ColDescriptor & col,std::shared_ptr<Tensor> * tensor)266 Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
267   RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
268   if (decode_ == true) {
269     Status rc = Decode(*tensor, tensor);
270     if (rc.IsError()) {
271       RETURN_STATUS_UNEXPECTED("Invalid data, failed to decode image: " + path);
272     }
273   }
274   return Status::OK();
275 }
276 
277 // When task is Detection, user can get bbox data with four columns:
278 // column ["bbox"] with datatype=float32
279 // column ["label"] with datatype=uint32
280 // column ["difficult"] with datatype=uint32
281 // column ["truncate"] with datatype=uint32
ReadAnnotationToTensor(const std::string & path,TensorRow * row)282 Status VOCOp::ReadAnnotationToTensor(const std::string &path, TensorRow *row) {
283   Annotation annotation = annotation_map_[path];
284   std::shared_ptr<Tensor> bbox, label, difficult, truncate;
285   std::vector<float> bbox_data;
286   std::vector<uint32_t> label_data, difficult_data, truncate_data;
287   dsize_t bbox_num = 0;
288   for (auto item : annotation) {
289     if (label_index_.find(item.first) != label_index_.end()) {
290       if (class_index_.find(item.first) != class_index_.end()) {
291         label_data.push_back(static_cast<uint32_t>(class_index_[item.first]));
292       } else {
293         label_data.push_back(static_cast<uint32_t>(label_index_[item.first]));
294       }
295       CHECK_FAIL_RETURN_UNEXPECTED(
296         item.second.size() == 6,
297         "Invalid parameter, annotation only support 6 parameters, but got " + std::to_string(item.second.size()));
298 
299       std::vector<float> tmp_bbox = {(item.second)[0], (item.second)[1], (item.second)[2], (item.second)[3]};
300       bbox_data.insert(bbox_data.end(), tmp_bbox.begin(), tmp_bbox.end());
301       difficult_data.push_back(static_cast<uint32_t>((item.second)[4]));
302       truncate_data.push_back(static_cast<uint32_t>((item.second)[5]));
303       bbox_num++;
304     }
305   }
306   RETURN_IF_NOT_OK(Tensor::CreateFromVector(bbox_data, TensorShape({bbox_num, 4}), &bbox));
307   RETURN_IF_NOT_OK(Tensor::CreateFromVector(label_data, TensorShape({bbox_num, 1}), &label));
308   RETURN_IF_NOT_OK(Tensor::CreateFromVector(difficult_data, TensorShape({bbox_num, 1}), &difficult));
309   RETURN_IF_NOT_OK(Tensor::CreateFromVector(truncate_data, TensorShape({bbox_num, 1}), &truncate));
310   (*row) = TensorRow({std::move(bbox), std::move(label), std::move(difficult), std::move(truncate)});
311   return Status::OK();
312 }
313 
CountTotalRows(int64_t * count)314 Status VOCOp::CountTotalRows(int64_t *count) {
315   switch (task_type_) {
316     case TaskType::Detection:
317       RETURN_IF_NOT_OK(ParseImageIds());
318       RETURN_IF_NOT_OK(ParseAnnotationIds());
319       break;
320     case TaskType::Segmentation:
321       RETURN_IF_NOT_OK(ParseImageIds());
322       break;
323   }
324   *count = static_cast<int64_t>(image_ids_.size());
325   return Status::OK();
326 }
327 
ComputeColMap()328 Status VOCOp::ComputeColMap() {
329   // Set the column name map (base class field)
330   if (column_name_id_map_.empty()) {
331     for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
332       column_name_id_map_[data_schema_->Column(i).Name()] = i;
333     }
334   } else {
335     MS_LOG(WARNING) << "Column name map is already set!";
336   }
337   return Status::OK();
338 }
339 
GetClassIndexing(std::vector<std::pair<std::string,std::vector<int32_t>>> * output_class_indexing)340 Status VOCOp::GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) {
341   if ((*output_class_indexing).empty()) {
342     if (task_type_ != TaskType::Detection) {
343       MS_LOG(ERROR) << "Invalid parameter, GetClassIndexing only valid in \"Detection\" task.";
344       RETURN_STATUS_UNEXPECTED("Invalid parameter, GetClassIndexing only valid in \"Detection\" task.");
345     }
346     RETURN_IF_NOT_OK(ParseImageIds());
347     RETURN_IF_NOT_OK(ParseAnnotationIds());
348     for (const auto &label : label_index_) {
349       if (!class_index_.empty()) {
350         (*output_class_indexing)
351           .emplace_back(std::make_pair(label.first, std::vector<int32_t>(1, class_index_[label.first])));
352       } else {
353         (*output_class_indexing).emplace_back(std::make_pair(label.first, std::vector<int32_t>(1, label.second)));
354       }
355     }
356   }
357   return Status::OK();
358 }
359 }  // namespace dataset
360 }  // namespace mindspore
361