• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_SERDES_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_SERDES_H_
18 
19 #include <algorithm>
20 #include <cstring>
21 #include <iostream>
22 #include <map>
23 #include <memory>
24 #include <set>
25 #include <string>
26 #include <vector>
27 #include <unordered_set>
28 #include <utility>
29 #include <nlohmann/json.hpp>
30 
31 #include "minddata/dataset/core/tensor.h"
32 
33 #include "minddata/dataset/engine/ir/datasetops/batch_node.h"
34 #include "minddata/dataset/engine/ir/datasetops/concat_node.h"
35 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
36 #include "minddata/dataset/engine/ir/datasetops/map_node.h"
37 #include "minddata/dataset/engine/ir/datasetops/project_node.h"
38 #include "minddata/dataset/engine/ir/datasetops/rename_node.h"
39 #include "minddata/dataset/engine/ir/datasetops/repeat_node.h"
40 #include "minddata/dataset/engine/ir/datasetops/shuffle_node.h"
41 #include "minddata/dataset/engine/ir/datasetops/skip_node.h"
42 #include "minddata/dataset/engine/ir/datasetops/data_queue_node.h"
43 #include "minddata/dataset/engine/ir/datasetops/take_node.h"
44 #include "minddata/dataset/engine/ir/datasetops/zip_node.h"
45 
46 #include "minddata/dataset/engine/ir/datasetops/source/album_node.h"
47 #include "minddata/dataset/engine/ir/datasetops/source/celeba_node.h"
48 #include "minddata/dataset/engine/ir/datasetops/source/cifar10_node.h"
49 #include "minddata/dataset/engine/ir/datasetops/source/cifar100_node.h"
50 #include "minddata/dataset/engine/ir/datasetops/source/clue_node.h"
51 #include "minddata/dataset/engine/ir/datasetops/source/coco_node.h"
52 #include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"
53 #include "minddata/dataset/engine/ir/datasetops/source/flickr_node.h"
54 #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
55 #include "minddata/dataset/engine/ir/datasetops/source/manifest_node.h"
56 #include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h"
57 #include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h"
58 #include "minddata/dataset/engine/ir/datasetops/source/tf_record_node.h"
59 #include "minddata/dataset/engine/ir/datasetops/source/voc_node.h"
60 
61 #include "minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.h"
62 #include "minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.h"
63 #include "minddata/dataset/engine/ir/datasetops/source/samplers/prebuilt_sampler_ir.h"
64 #include "minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.h"
65 #include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h"
66 #include "minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.h"
67 #include "minddata/dataset/engine/ir/datasetops/source/samplers/skip_first_epoch_sampler_ir.h"
68 #include "minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.h"
69 #include "minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.h"
70 #include "minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.h"
71 
72 #include "minddata/dataset/include/dataset/constants.h"
73 #include "minddata/dataset/include/dataset/datasets.h"
74 #include "minddata/dataset/include/dataset/execute.h"
75 #include "minddata/dataset/include/dataset/iterator.h"
76 #include "minddata/dataset/include/dataset/samplers.h"
77 #include "minddata/dataset/include/dataset/transforms.h"
78 #include "minddata/dataset/include/dataset/vision.h"
79 
80 #include "minddata/dataset/kernels/py_func_op.h"
81 #include "minddata/dataset/kernels/ir/data/transforms_ir.h"
82 #include "minddata/dataset/kernels/ir/vision/adjust_gamma_ir.h"
83 #include "minddata/dataset/kernels/ir/vision/affine_ir.h"
84 #include "minddata/dataset/kernels/ir/vision/ascend_vision_ir.h"
85 #include "minddata/dataset/kernels/ir/vision/auto_contrast_ir.h"
86 #include "minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h"
87 #include "minddata/dataset/kernels/ir/vision/center_crop_ir.h"
88 #include "minddata/dataset/kernels/ir/vision/crop_ir.h"
89 #include "minddata/dataset/kernels/ir/vision/cutmix_batch_ir.h"
90 #include "minddata/dataset/kernels/ir/vision/cutout_ir.h"
91 #include "minddata/dataset/kernels/ir/vision/decode_ir.h"
92 #include "minddata/dataset/kernels/ir/vision/equalize_ir.h"
93 #include "minddata/dataset/kernels/ir/vision/gaussian_blur_ir.h"
94 #include "minddata/dataset/kernels/ir/vision/horizontal_flip_ir.h"
95 #include "minddata/dataset/kernels/ir/vision/hwc_to_chw_ir.h"
96 #include "minddata/dataset/kernels/ir/vision/invert_ir.h"
97 #include "minddata/dataset/kernels/ir/vision/mixup_batch_ir.h"
98 #include "minddata/dataset/kernels/ir/vision/normalize_ir.h"
99 #include "minddata/dataset/kernels/ir/vision/normalize_pad_ir.h"
100 #include "minddata/dataset/kernels/ir/vision/pad_ir.h"
101 #include "minddata/dataset/kernels/ir/vision/random_affine_ir.h"
102 #include "minddata/dataset/kernels/ir/vision/random_color_adjust_ir.h"
103 #include "minddata/dataset/kernels/ir/vision/random_color_ir.h"
104 #include "minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.h"
105 #include "minddata/dataset/kernels/ir/vision/random_crop_ir.h"
106 #include "minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.h"
107 #include "minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.h"
108 #include "minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.h"
109 #include "minddata/dataset/kernels/ir/vision/random_posterize_ir.h"
110 #include "minddata/dataset/kernels/ir/vision/random_resized_crop_ir.h"
111 #include "minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.h"
112 #include "minddata/dataset/kernels/ir/vision/random_resize_ir.h"
113 #include "minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.h"
114 #include "minddata/dataset/kernels/ir/vision/random_rotation_ir.h"
115 #include "minddata/dataset/kernels/ir/vision/random_select_subpolicy_ir.h"
116 #include "minddata/dataset/kernels/ir/vision/random_sharpness_ir.h"
117 #include "minddata/dataset/kernels/ir/vision/random_solarize_ir.h"
118 #include "minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.h"
119 #include "minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.h"
120 #include "minddata/dataset/kernels/ir/vision/rescale_ir.h"
121 #include "minddata/dataset/kernels/ir/vision/resize_ir.h"
122 #include "minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.h"
123 #include "minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.h"
124 #include "minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.h"
125 #include "minddata/dataset/kernels/ir/vision/rgba_to_rgb_ir.h"
126 #include "minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h"
127 #include "minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h"
128 #include "minddata/dataset/kernels/ir/vision/rotate_ir.h"
129 #include "minddata/dataset/kernels/ir/vision/slice_patches_ir.h"
130 #include "minddata/dataset/kernels/ir/vision/swap_red_blue_ir.h"
131 #include "minddata/dataset/kernels/ir/vision/to_tensor_ir.h"
132 #include "minddata/dataset/kernels/ir/vision/uniform_aug_ir.h"
133 #include "minddata/dataset/kernels/ir/vision/vertical_flip_ir.h"
134 #include "minddata/dataset/text/ir/kernels/text_ir.h"
135 #include "minddata/dataset/util/status.h"
136 
137 namespace mindspore {
138 namespace dataset {
139 /// \brief The Serdes class is used to serialize an IR tree into JSON string and dump into file if file name
140 /// specified.
141 class Serdes {
142  public:
143   /// \brief Constructor
Serdes()144   Serdes() {}
145 
146   /// \brief default destructor
147   ~Serdes() = default;
148 
149   /// \brief function to serialize IR tree into JSON string and/or JSON file
150   /// \param[in] node IR node to be transferred
151   /// \param[in] filename The file name. If specified, save the generated JSON string into the file
152   /// \param[out] out_json The result json string
153   /// \return Status The status code returned
154   static Status SaveToJSON(std::shared_ptr<DatasetNode> node, const std::string &filename, nlohmann::json *out_json);
155 
156   /// \brief Function to update the parameters [num_parallel_workers, connector_queue_size] in the serialized JSON
157   /// object of the optimized IR tree
158   /// \param[in, out] serialized_json The optimized ir tree json node
159   /// \param[in] op_map An ID to DatasetOp mapping
160   static Status UpdateOptimizedIRTreeJSON(nlohmann::json *serialized_json,
161                                           const std::map<int32_t, std::shared_ptr<DatasetOp>> &op_map);
162 
163   /// \brief function to de-serialize JSON file to IR tree
164   /// \param[in] json_filepath input path of json file
165   /// \param[out] ds The deserialized dataset
166   /// \return Status The status code returned
167   static Status Deserialize(const std::string &json_filepath, std::shared_ptr<DatasetNode> *ds);
168 
169   /// \brief Helper function to construct IR tree, separate zip and other operations
170   /// \param[in] json_obj The JSON object to be deserialized
171   /// \param[out] ds Shared pointer of a DatasetNode object containing the deserialized IR tree
172   /// \return Status The status code returned
173   static Status ConstructPipeline(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
174 
175   /// \brief Helper functions for creating sampler, separate different samplers and call the related function
176   /// \param[in] json_obj The JSON object to be deserialized
177   /// \param[out] sampler Deserialized sampler
178   /// \return Status The status code returned
179   static Status ConstructSampler(nlohmann::json json_obj, std::shared_ptr<SamplerObj> *sampler);
180 
181   /// \brief helper function to construct tensor operations
182   /// \param[in] json_obj json object of operations to be deserilized
183   /// \param[out] vector of tensor operation pointer
184   /// \return Status The status code returned
185   static Status ConstructTensorOps(nlohmann::json json_obj, std::vector<std::shared_ptr<TensorOperation>> *result);
186 
187   /// \brief helper function to load tensor operations from dataset JSON and construct Execute object.
188   /// \param[in] map_json_string JSON string of dataset.
189   /// \param[out] data_graph Execute object contains tensor operations of map.
190   /// \return Status The status code returned.
191   static Status ParseMindIRPreprocess(const std::vector<std::string> &map_json_string,
192                                       std::vector<std::shared_ptr<mindspore::dataset::Execute>> *data_graph);
193 
194   /// \brief Helper function to save JSON to a file
195   /// \param[in] json_string The JSON string to be saved to the file
196   /// \param[in] file_name The file name
197   /// \param[in] pretty Flag to control pretty printing of JSON string to the file
198   /// \return Status The status code returned
199   static Status SaveJSONToFile(const nlohmann::json &json_string, const std::string &file_name, bool pretty = false);
200 
201  protected:
202   /// \brief Function to determine type of the node - dataset node if no dataset exists or operation node
203   /// \param[in] child_ds children datasets that is already created
204   /// \param[in] json_obj json object to read out type of the node
205   /// \param[out] ds Shared pointer of a DatasetNode object containing the deserialized IR tree
206   /// \return create new node based on the input dataset and type of the operation
207   static Status CreateNode(const std::shared_ptr<DatasetNode> &child_ds, nlohmann::json json_obj,
208                            std::shared_ptr<DatasetNode> *ds);
209 
210   /// \brief Helper functions for creating dataset nodes, separate different datasets and call the related function
211   /// \param[in] json_obj The JSON object to be deserialized
212   /// \param[in] op_type type of dataset
213   /// \param[out] ds Shared pointer of a DatasetNode object containing the deserialized IR tree
214   /// \return Status The status code returned
215   static Status CreateDatasetNode(const nlohmann::json &json_obj, const std::string &op_type,
216                                   std::shared_ptr<DatasetNode> *ds);
217 
218   /// \brief Helper functions for creating operation nodes, separate different operations and call the related function
219   /// \param[in] json_obj The JSON object to be deserialized
220   /// \param[in] op_type type of dataset
221   /// \param[out] result Shared pointer of a DatasetNode object containing the deserialized IR tree
222   /// \return Status The status code returned
223   static Status CreateDatasetOperationNode(const std::shared_ptr<DatasetNode> &ds, const nlohmann::json &json_obj,
224                                            const std::string &op_type, std::shared_ptr<DatasetNode> *result);
225 
226   /// \brief Helper function to map the function pointers
227   /// \return map of key to function pointer
228   static std::map<std::string, Status (*)(nlohmann::json json_obj, std::shared_ptr<TensorOperation> *operation)>
229   InitializeFuncPtr();
230 
231   /// \brief Helper function to perform recursive DFS on the optimized IR tree and to match each IR node with its
232   /// corresponding dataset op
233   /// \param [in, out] serialized_json The optimized ir tree json node
234   /// \param [in, out] op_id The id in execution tree from where to continue the IR Node - DatasetOp matching search
235   /// \param [in] op_map An ID to DatasetOp mapping
236   static Status RecurseUpdateOptimizedIRTreeJSON(nlohmann::json *serialized_json, int32_t *op_id,
237                                                  const std::map<int32_t, std::shared_ptr<DatasetOp>> &op_map);
238 
239  private:
240   static std::map<std::string, Status (*)(nlohmann::json json_obj, std::shared_ptr<TensorOperation> *operation)>
241     func_ptr_;
242 };
243 
244 }  // namespace dataset
245 }  // namespace mindspore
246 
247 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_SERDES_H_
248