• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_MANIFEST_OP_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_MANIFEST_OP_H_
18 
19 #include <map>
20 #include <memory>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 
25 #include "minddata/dataset/core/tensor.h"
26 
27 #include "minddata/dataset/engine/data_schema.h"
28 #include "minddata/dataset/engine/datasetops/parallel_op.h"
29 #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h"
30 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
31 #include "minddata/dataset/kernels/image/image_utils.h"
32 #include "minddata/dataset/util/queue.h"
33 #include "minddata/dataset/util/services.h"
34 #include "minddata/dataset/util/status.h"
35 #include "minddata/dataset/util/wait_post.h"
36 
37 namespace mindspore {
38 namespace dataset {
39 class ManifestOp : public MappableLeafOp {
40  public:
41   // Constructor
42   // @param int32_t num_works - Num of workers reading images in parallel
43   // @param std::string - file list of Manifest
44   // @param int32_t queue_size - connector queue size
45   // @param td::unique_ptr<Sampler> sampler - sampler tells ImageFolderOp what to read
46   ManifestOp(int32_t num_works, std::string file, int32_t queue_size, bool decode,
47              const std::map<std::string, int32_t> &class_index, std::unique_ptr<DataSchema> data_schema,
48              std::shared_ptr<SamplerRT> sampler, std::string usage);
49   // Destructor.
50   ~ManifestOp() = default;
51 
52   // Method derived from RandomAccess Op, enable Sampler to get all ids for each class
53   // @param (std::map<int64_t, std::vector<int64_t >> * map - key label, val all ids for this class
54   // @return Status The status code returned
55   Status GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const override;
56 
57   // A print method typically used for debugging
58   // @param out
59   // @param show_all
60   void Print(std::ostream &out, bool show_all) const override;
61 
62   /// \brief Counts the total number of rows in Manifest
63   /// \param[out] count Number of rows counted
64   /// \return Status of the function
65   Status CountTotalRows(int64_t *count);
66 
67   // Op name getter
68   // @return Name of the current Op
Name()69   std::string Name() const override { return "ManifestOp"; }
70 
71   /// \brief Base-class override for GetNumClasses
72   /// \param[out] num_classes the number of classes
73   /// \return Status of the function
74   Status GetNumClasses(int64_t *num_classes) override;
75 
76   /// \brief Gets the class indexing
77   /// \return Status - The status code return
78   Status GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) override;
79 
80  private:
81   // Load a tensor row according to a pair
82   // @param row_id_type row_id - id for this tensor row
83   // @param std::pair<std::string, std::vector<std::string>> - <imagefile, <label1, label2...>>
84   // @param TensorRow row - image & label read into this tensor row
85   // @return Status The status code returned
86   Status LoadTensorRow(row_id_type row_id, TensorRow *row) override;
87 
88   // Parse manifest file to get image path and label and so on.
89   // @return Status The status code returned
90   Status ParseManifestFile();
91 
92   // Called first when function is called
93   // @return Status The status code returned
94   Status LaunchThreadsAndInitOp() override;
95 
96   // Check if image ia valid.Only support JPEG/PNG/GIF/BMP
97   // @return
98   Status CheckImageType(const std::string &file_name, bool *valid);
99 
100   // Count label index,num rows and num samples
101   // @return Status The status code returned
102   Status CountDatasetInfo();
103 
104   // Private function for computing the assignment of the column name map.
105   // @return - Status
106   Status ComputeColMap() override;
107 
108   int64_t io_block_pushed_;
109   int64_t sampler_ind_;
110   std::unique_ptr<DataSchema> data_schema_;
111   std::string file_;  // file that store the information of images
112   std::map<std::string, int32_t> class_index_;
113   bool decode_;
114   std::string usage_;
115 
116   std::map<std::string, int32_t> label_index_;
117   std::vector<std::pair<std::string, std::vector<std::string>>> image_labelname_;
118 };
119 }  // namespace dataset
120 }  // namespace mindspore
121 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_MANIFEST_OP_H_
122