1 /** 2 * Copyright 2020-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_OP_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_OP_H_ 18 19 #include <algorithm> 20 #include <deque> 21 #include <map> 22 #include <memory> 23 #include <queue> 24 #include <set> 25 #include <string> 26 #include <unordered_map> 27 #include <utility> 28 #include <vector> 29 #include "minddata/dataset/core/tensor.h" 30 31 #include "minddata/dataset/engine/data_schema.h" 32 #include "minddata/dataset/engine/datasetops/parallel_op.h" 33 #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" 34 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" 35 #include "minddata/dataset/util/path.h" 36 #include "minddata/dataset/util/queue.h" 37 #include "minddata/dataset/util/services.h" 38 #include "minddata/dataset/util/status.h" 39 #include "minddata/dataset/util/wait_post.h" 40 41 namespace mindspore { 42 namespace dataset { 43 // Forward declares 44 template <typename T> 45 class Queue; 46 47 // Define row information as a list of file objects to read 48 using FolderImages = std::shared_ptr<std::pair<std::string, std::queue<std::string>>>; 49 50 /// \class AlbumOp album_op.h 51 class AlbumOp : public MappableLeafOp { 52 public: 53 /// \brief Constructor 54 /// \param[in] num_wkrs - Num of workers reading images in parallel 55 /// \param[in] file_dir - directory of Album 56 /// \param[in] queue_size - connector size 57 /// \param[in] do_decode - decode image files 58 /// \param[in] exts - set of file extensions to read, if empty, read everything under the dir 59 /// \param[in] data_schema - schema of dataset 60 /// \param[in] sampler - sampler tells AlbumOp what to read 61 AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool do_decode, const std::set<std::string> &exts, 62 std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler); 63 64 /// \brief Destructor. 65 ~AlbumOp() = default; 66 67 /// \brief Initialize AlbumOp related var, calls the function to walk all files 68 /// \return Status The status code returned 69 Status PrescanEntry(); 70 71 /// \brief A print method typically used for debugging 72 /// \param[in] out 73 /// \param[in] show_all 74 void Print(std::ostream &out, bool show_all) const override; 75 76 /// \brief Check if image ia valid.Only support JPEG/PNG/GIF/BMP 77 /// This function could be optimized to return the tensor to reduce open/closing files 78 /// \return bool - if file is bad then return false 79 bool CheckImageType(const std::string &file_name, bool *valid); 80 81 // Op name getter 82 // @return Name of the current Op Name()83 std::string Name() const override { return "AlbumOp"; } 84 85 private: 86 /// \brief Load image to tensor row 87 /// \param[in] image_file Image name of file 88 /// \param[in] col_num Column num in schema 89 /// \param[in, out] row Tensor row to push to 90 /// \return Status The status code returned 91 Status LoadImageTensor(const std::string &image_file, int32_t col_num, TensorRow *row); 92 93 /// \brief Load vector of ints to tensor, append tensor to tensor row 94 /// \param[in] json_obj Json object containing multi-dimensional label 95 /// \param[in] col_num Column num in schema 96 /// \param[in, out] row Tensor row to push to 97 /// \return Status The status code returned 98 Status LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row); 99 100 /// \brief Load vector of floatss to tensor, append tensor to tensor row 101 /// \param[in] json_obj Json object containing array data 102 /// \param[in] col_num Column num in schema 103 /// \param[in, out] row Tensor row to push to 104 /// \return Status The status code returned 105 Status LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row); 106 107 /// \brief Load string array into a tensor, append tensor to tensor row 108 /// \param[in] json_obj Json object containing string tensor 109 /// \param[in] col_num Column num in schema 110 /// \param[in, out] row Tensor row to push to 111 /// \return Status The status code returned 112 Status LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row); 113 114 /// \brief Load string into a tensor, append tensor to tensor row 115 /// \param[in] json_obj Json object containing string tensor 116 /// \param[in] col_num Column num in schema 117 /// \param[in, out] row Tensor row to push to 118 /// \return Status The status code returned 119 Status LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row); 120 121 /// \brief Load float value to tensor row 122 /// \param[in] json_obj Json object containing float 123 /// \param[in] col_num Column num in schema 124 /// \param[in, out] row Tensor row to push to 125 /// \return Status The status code returned 126 Status LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row); 127 128 /// \brief Load int value to tensor row 129 /// \param[in] json_obj Json object containing int 130 /// \param[in] col_num Column num in schema 131 /// \param[in, out] row Tensor row to push to 132 /// \return Status The status code returned 133 Status LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row); 134 135 /// \brief Load empty tensor to tensor row 136 /// \param[in] col_num Column num in schema 137 /// \param[in, out] row Tensor row to push to 138 /// \return Status The status code returned 139 Status LoadEmptyTensor(int32_t col_num, TensorRow *row); 140 141 /// \brief Load id from file name to tensor row 142 /// \param[in] file The file name to get ID from 143 /// \param[in] col_num Column num in schema 144 /// \param[in, out] row Tensor row to push to 145 /// \return Status The status code returned 146 Status LoadIDTensor(const std::string &file, int32_t col_num, TensorRow *row); 147 148 /// \brief Load a tensor row according to a json file 149 /// \param[in] row_id_type row_id - id for this tensor row 150 /// \param[in, out] TensorRow row Json content stored into a tensor row 151 /// \return Status The status code returned 152 Status LoadTensorRow(row_id_type row_id, TensorRow *row) override; 153 154 /// \brief Load a tensor column according to a json file 155 /// \param[in] ImageColumns file Json file location 156 /// \param[in] index - certain column index 157 /// \param[in] js - json object 158 /// \param[in, out] TensorRow row Json content stored into a tensor row 159 /// \return Status The status code returned 160 Status loadColumnData(const std::string &file, int32_t index, nlohmann::json js, TensorRow *row); 161 162 /// \brief Called first when function is called 163 /// \return Status The status code returned 164 Status LaunchThreadsAndInitOp() override; 165 166 /// \brief Gets the next row 167 /// \param row[out] - Fetched TensorRow 168 /// \return Status The status code returned 169 Status GetNextRowPullMode(TensorRow *const row) override; 170 171 /// Private function for computing the assignment of the column name map. 172 /// \return Status The status code returned 173 Status ComputeColMap() override; 174 175 std::string folder_path_; // directory of image folder 176 bool decode_; 177 std::set<std::string> extensions_; // extensions allowed 178 std::unordered_map<std::string, int32_t> col_name_map_; 179 std::unique_ptr<DataSchema> data_schema_; 180 int64_t sampler_ind_; 181 int64_t dirname_offset_; 182 std::vector<std::string> image_rows_; 183 TensorPtr sample_ids_; 184 185 uint32_t curr_row_; 186 }; 187 } // namespace dataset 188 } // namespace mindspore 189 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_OP_H_ 190