• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_OP_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_OP_H_
18 
19 #include <algorithm>
20 #include <deque>
21 #include <map>
22 #include <memory>
23 #include <queue>
24 #include <set>
25 #include <string>
26 #include <unordered_map>
27 #include <utility>
28 #include <vector>
29 #include "minddata/dataset/core/tensor.h"
30 
31 #include "minddata/dataset/engine/data_schema.h"
32 #include "minddata/dataset/engine/datasetops/parallel_op.h"
33 #include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h"
34 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
35 #include "minddata/dataset/util/path.h"
36 #include "minddata/dataset/util/queue.h"
37 #include "minddata/dataset/util/services.h"
38 #include "minddata/dataset/util/status.h"
39 #include "minddata/dataset/util/wait_post.h"
40 
41 namespace mindspore {
42 namespace dataset {
43 // Forward declares
44 template <typename T>
45 class Queue;
46 
47 // Define row information as a list of file objects to read
48 using FolderImages = std::shared_ptr<std::pair<std::string, std::queue<std::string>>>;
49 
50 /// \class AlbumOp album_op.h
51 class AlbumOp : public MappableLeafOp {
52  public:
53   /// \brief Constructor
54   /// \param[in] num_wkrs - Num of workers reading images in parallel
55   /// \param[in] file_dir - directory of Album
56   /// \param[in] queue_size - connector size
57   /// \param[in] do_decode - decode image files
58   /// \param[in] exts - set of file extensions to read, if empty, read everything under the dir
59   /// \param[in] data_schema - schema of dataset
60   /// \param[in] sampler - sampler tells AlbumOp what to read
61   AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool do_decode, const std::set<std::string> &exts,
62           std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
63 
64   /// \brief Destructor.
65   ~AlbumOp() = default;
66 
67   /// \brief Initialize AlbumOp related var, calls the function to walk all files
68   /// \return Status The status code returned
69   Status PrescanEntry();
70 
71   /// \brief A print method typically used for debugging
72   /// \param[in] out
73   /// \param[in] show_all
74   void Print(std::ostream &out, bool show_all) const override;
75 
76   /// \brief Check if image ia valid.Only support JPEG/PNG/GIF/BMP
77   ///     This function could be optimized to return the tensor to reduce open/closing files
78   /// \return bool - if file is bad then return false
79   bool CheckImageType(const std::string &file_name, bool *valid);
80 
81   // Op name getter
82   // @return Name of the current Op
Name()83   std::string Name() const override { return "AlbumOp"; }
84 
85  private:
86   /// \brief Load image to tensor row
87   /// \param[in] image_file Image name of file
88   /// \param[in] col_num Column num in schema
89   /// \param[in, out] row Tensor row to push to
90   /// \return Status The status code returned
91   Status LoadImageTensor(const std::string &image_file, int32_t col_num, TensorRow *row);
92 
93   /// \brief Load vector of ints to tensor, append tensor to tensor row
94   /// \param[in] json_obj Json object containing multi-dimensional label
95   /// \param[in] col_num Column num in schema
96   /// \param[in, out] row Tensor row to push to
97   /// \return Status The status code returned
98   Status LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
99 
100   /// \brief Load vector of floatss to tensor, append tensor to tensor row
101   /// \param[in] json_obj Json object containing array data
102   /// \param[in] col_num Column num in schema
103   /// \param[in, out] row Tensor row to push to
104   /// \return Status The status code returned
105   Status LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
106 
107   /// \brief Load string array into a tensor, append tensor to tensor row
108   /// \param[in] json_obj Json object containing string tensor
109   /// \param[in] col_num Column num in schema
110   /// \param[in, out] row Tensor row to push to
111   /// \return Status The status code returned
112   Status LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
113 
114   /// \brief Load string into a tensor, append tensor to tensor row
115   /// \param[in] json_obj Json object containing string tensor
116   /// \param[in] col_num Column num in schema
117   /// \param[in, out] row Tensor row to push to
118   /// \return Status The status code returned
119   Status LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
120 
121   /// \brief Load float value to tensor row
122   /// \param[in] json_obj Json object containing float
123   /// \param[in] col_num Column num in schema
124   /// \param[in, out] row Tensor row to push to
125   /// \return Status The status code returned
126   Status LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
127 
128   /// \brief Load int value to tensor row
129   /// \param[in] json_obj Json object containing int
130   /// \param[in] col_num Column num in schema
131   /// \param[in, out] row Tensor row to push to
132   /// \return Status The status code returned
133   Status LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
134 
135   /// \brief Load empty tensor to tensor row
136   /// \param[in] col_num Column num in schema
137   /// \param[in, out] row Tensor row to push to
138   /// \return Status The status code returned
139   Status LoadEmptyTensor(int32_t col_num, TensorRow *row);
140 
141   /// \brief Load id from file name to tensor row
142   /// \param[in] file The file name to get ID from
143   /// \param[in] col_num Column num in schema
144   /// \param[in, out] row Tensor row to push to
145   /// \return Status The status code returned
146   Status LoadIDTensor(const std::string &file, int32_t col_num, TensorRow *row);
147 
148   /// \brief Load a tensor row according to a json file
149   /// \param[in] row_id_type row_id - id for this tensor row
150   /// \param[in, out] TensorRow row Json content stored into a tensor row
151   /// \return Status The status code returned
152   Status LoadTensorRow(row_id_type row_id, TensorRow *row) override;
153 
154   /// \brief Load a tensor column according to a json file
155   /// \param[in] ImageColumns file Json file location
156   /// \param[in] index - certain column index
157   /// \param[in] js - json object
158   /// \param[in, out] TensorRow row Json content stored into a tensor row
159   /// \return Status The status code returned
160   Status loadColumnData(const std::string &file, int32_t index, nlohmann::json js, TensorRow *row);
161 
162   /// \brief Called first when function is called
163   /// \return Status The status code returned
164   Status LaunchThreadsAndInitOp() override;
165 
166   /// \brief Gets the next row
167   /// \param row[out] - Fetched TensorRow
168   /// \return Status The status code returned
169   Status GetNextRowPullMode(TensorRow *const row) override;
170 
171   /// Private function for computing the assignment of the column name map.
172   /// \return Status The status code returned
173   Status ComputeColMap() override;
174 
175   std::string folder_path_;  // directory of image folder
176   bool decode_;
177   std::set<std::string> extensions_;  // extensions allowed
178   std::unordered_map<std::string, int32_t> col_name_map_;
179   std::unique_ptr<DataSchema> data_schema_;
180   int64_t sampler_ind_;
181   int64_t dirname_offset_;
182   std::vector<std::string> image_rows_;
183   TensorPtr sample_ids_;
184 
185   uint32_t curr_row_;
186 };
187 }  // namespace dataset
188 }  // namespace mindspore
189 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_ALBUM_OP_H_
190