1 /** 2 * Copyright 2019-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_MANIFEST_OP_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_MANIFEST_OP_H_ 18 19 #include <map> 20 #include <memory> 21 #include <string> 22 #include <utility> 23 #include <vector> 24 25 #include "minddata/dataset/core/tensor.h" 26 27 #include "minddata/dataset/engine/data_schema.h" 28 #include "minddata/dataset/engine/datasetops/parallel_op.h" 29 #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" 30 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" 31 #include "minddata/dataset/kernels/image/image_utils.h" 32 #include "minddata/dataset/util/queue.h" 33 #include "minddata/dataset/util/services.h" 34 #include "minddata/dataset/util/status.h" 35 #include "minddata/dataset/util/wait_post.h" 36 37 namespace mindspore { 38 namespace dataset { 39 class ManifestOp : public MappableLeafOp { 40 public: 41 // Constructor 42 // @param int32_t num_works - Num of workers reading images in parallel 43 // @param std::string - file list of Manifest 44 // @param int32_t queue_size - connector queue size 45 // @param td::unique_ptr<Sampler> sampler - sampler tells ImageFolderOp what to read 46 ManifestOp(int32_t num_works, std::string file, int32_t queue_size, bool decode, 47 const std::map<std::string, int32_t> &class_index, std::unique_ptr<DataSchema> data_schema, 48 std::shared_ptr<SamplerRT> sampler, std::string usage); 49 // Destructor. 50 ~ManifestOp() = default; 51 52 // Method derived from RandomAccess Op, enable Sampler to get all ids for each class 53 // @param (std::map<int64_t, std::vector<int64_t >> * map - key label, val all ids for this class 54 // @return Status The status code returned 55 Status GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const override; 56 57 // A print method typically used for debugging 58 // @param out 59 // @param show_all 60 void Print(std::ostream &out, bool show_all) const override; 61 62 /// \brief Counts the total number of rows in Manifest 63 /// \param[out] count Number of rows counted 64 /// \return Status of the function 65 Status CountTotalRows(int64_t *count); 66 67 // Op name getter 68 // @return Name of the current Op Name()69 std::string Name() const override { return "ManifestOp"; } 70 71 /// \brief Base-class override for GetNumClasses 72 /// \param[out] num_classes the number of classes 73 /// \return Status of the function 74 Status GetNumClasses(int64_t *num_classes) override; 75 76 /// \brief Gets the class indexing 77 /// \return Status - The status code return 78 Status GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) override; 79 80 private: 81 // Load a tensor row according to a pair 82 // @param row_id_type row_id - id for this tensor row 83 // @param std::pair<std::string, std::vector<std::string>> - <imagefile, <label1, label2...>> 84 // @param TensorRow row - image & label read into this tensor row 85 // @return Status The status code returned 86 Status LoadTensorRow(row_id_type row_id, TensorRow *row) override; 87 88 // Parse manifest file to get image path and label and so on. 89 // @return Status The status code returned 90 Status ParseManifestFile(); 91 92 // Called first when function is called 93 // @return Status The status code returned 94 Status LaunchThreadsAndInitOp() override; 95 96 // Check if image ia valid.Only support JPEG/PNG/GIF/BMP 97 // @return 98 Status CheckImageType(const std::string &file_name, bool *valid); 99 100 // Count label index,num rows and num samples 101 // @return Status The status code returned 102 Status CountDatasetInfo(); 103 104 // Private function for computing the assignment of the column name map. 105 // @return - Status 106 Status ComputeColMap() override; 107 108 int64_t io_block_pushed_; 109 int64_t sampler_ind_; 110 std::unique_ptr<DataSchema> data_schema_; 111 std::string file_; // file that store the information of images 112 std::map<std::string, int32_t> class_index_; 113 bool decode_; 114 std::string usage_; 115 116 std::map<std::string, int32_t> label_index_; 117 std::vector<std::pair<std::string, std::vector<std::string>>> image_labelname_; 118 }; 119 } // namespace dataset 120 } // namespace mindspore 121 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_MANIFEST_OP_H_ 122