1 /** 2 * Copyright 2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_SERDES_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_SERDES_H_ 18 19 #include <algorithm> 20 #include <cstring> 21 #include <iostream> 22 #include <map> 23 #include <memory> 24 #include <set> 25 #include <string> 26 #include <vector> 27 #include <unordered_set> 28 #include <utility> 29 #include <nlohmann/json.hpp> 30 31 #include "minddata/dataset/core/tensor.h" 32 33 #include "minddata/dataset/engine/ir/datasetops/batch_node.h" 34 #include "minddata/dataset/engine/ir/datasetops/concat_node.h" 35 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h" 36 #include "minddata/dataset/engine/ir/datasetops/map_node.h" 37 #include "minddata/dataset/engine/ir/datasetops/project_node.h" 38 #include "minddata/dataset/engine/ir/datasetops/rename_node.h" 39 #include "minddata/dataset/engine/ir/datasetops/repeat_node.h" 40 #include "minddata/dataset/engine/ir/datasetops/shuffle_node.h" 41 #include "minddata/dataset/engine/ir/datasetops/skip_node.h" 42 #include "minddata/dataset/engine/ir/datasetops/data_queue_node.h" 43 #include "minddata/dataset/engine/ir/datasetops/take_node.h" 44 #include "minddata/dataset/engine/ir/datasetops/zip_node.h" 45 46 #include "minddata/dataset/engine/ir/datasetops/source/album_node.h" 47 #include "minddata/dataset/engine/ir/datasetops/source/celeba_node.h" 48 #include "minddata/dataset/engine/ir/datasetops/source/cifar10_node.h" 49 #include "minddata/dataset/engine/ir/datasetops/source/cifar100_node.h" 50 #include "minddata/dataset/engine/ir/datasetops/source/clue_node.h" 51 #include "minddata/dataset/engine/ir/datasetops/source/coco_node.h" 52 #include "minddata/dataset/engine/ir/datasetops/source/csv_node.h" 53 #include "minddata/dataset/engine/ir/datasetops/source/flickr_node.h" 54 #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" 55 #include "minddata/dataset/engine/ir/datasetops/source/manifest_node.h" 56 #include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h" 57 #include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h" 58 #include "minddata/dataset/engine/ir/datasetops/source/tf_record_node.h" 59 #include "minddata/dataset/engine/ir/datasetops/source/voc_node.h" 60 61 #include "minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.h" 62 #include "minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.h" 63 #include "minddata/dataset/engine/ir/datasetops/source/samplers/prebuilt_sampler_ir.h" 64 #include "minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.h" 65 #include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h" 66 #include "minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.h" 67 #include "minddata/dataset/engine/ir/datasetops/source/samplers/skip_first_epoch_sampler_ir.h" 68 #include "minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.h" 69 #include "minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.h" 70 #include "minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.h" 71 72 #include "minddata/dataset/include/dataset/constants.h" 73 #include "minddata/dataset/include/dataset/datasets.h" 74 #include "minddata/dataset/include/dataset/execute.h" 75 #include "minddata/dataset/include/dataset/iterator.h" 76 #include "minddata/dataset/include/dataset/samplers.h" 77 #include "minddata/dataset/include/dataset/transforms.h" 78 #include "minddata/dataset/include/dataset/vision.h" 79 80 #include "minddata/dataset/kernels/py_func_op.h" 81 #include "minddata/dataset/kernels/ir/data/transforms_ir.h" 82 #include "minddata/dataset/kernels/ir/vision/adjust_gamma_ir.h" 83 #include "minddata/dataset/kernels/ir/vision/affine_ir.h" 84 #include "minddata/dataset/kernels/ir/vision/ascend_vision_ir.h" 85 #include "minddata/dataset/kernels/ir/vision/auto_contrast_ir.h" 86 #include "minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h" 87 #include "minddata/dataset/kernels/ir/vision/center_crop_ir.h" 88 #include "minddata/dataset/kernels/ir/vision/crop_ir.h" 89 #include "minddata/dataset/kernels/ir/vision/cutmix_batch_ir.h" 90 #include "minddata/dataset/kernels/ir/vision/cutout_ir.h" 91 #include "minddata/dataset/kernels/ir/vision/decode_ir.h" 92 #include "minddata/dataset/kernels/ir/vision/equalize_ir.h" 93 #include "minddata/dataset/kernels/ir/vision/gaussian_blur_ir.h" 94 #include "minddata/dataset/kernels/ir/vision/horizontal_flip_ir.h" 95 #include "minddata/dataset/kernels/ir/vision/hwc_to_chw_ir.h" 96 #include "minddata/dataset/kernels/ir/vision/invert_ir.h" 97 #include "minddata/dataset/kernels/ir/vision/mixup_batch_ir.h" 98 #include "minddata/dataset/kernels/ir/vision/normalize_ir.h" 99 #include "minddata/dataset/kernels/ir/vision/normalize_pad_ir.h" 100 #include "minddata/dataset/kernels/ir/vision/pad_ir.h" 101 #include "minddata/dataset/kernels/ir/vision/random_affine_ir.h" 102 #include "minddata/dataset/kernels/ir/vision/random_color_adjust_ir.h" 103 #include "minddata/dataset/kernels/ir/vision/random_color_ir.h" 104 #include "minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.h" 105 #include "minddata/dataset/kernels/ir/vision/random_crop_ir.h" 106 #include "minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.h" 107 #include "minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.h" 108 #include "minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.h" 109 #include "minddata/dataset/kernels/ir/vision/random_posterize_ir.h" 110 #include "minddata/dataset/kernels/ir/vision/random_resized_crop_ir.h" 111 #include "minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.h" 112 #include "minddata/dataset/kernels/ir/vision/random_resize_ir.h" 113 #include "minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.h" 114 #include "minddata/dataset/kernels/ir/vision/random_rotation_ir.h" 115 #include "minddata/dataset/kernels/ir/vision/random_select_subpolicy_ir.h" 116 #include "minddata/dataset/kernels/ir/vision/random_sharpness_ir.h" 117 #include "minddata/dataset/kernels/ir/vision/random_solarize_ir.h" 118 #include "minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.h" 119 #include "minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.h" 120 #include "minddata/dataset/kernels/ir/vision/rescale_ir.h" 121 #include "minddata/dataset/kernels/ir/vision/resize_ir.h" 122 #include "minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.h" 123 #include "minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.h" 124 #include "minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.h" 125 #include "minddata/dataset/kernels/ir/vision/rgba_to_rgb_ir.h" 126 #include "minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h" 127 #include "minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h" 128 #include "minddata/dataset/kernels/ir/vision/rotate_ir.h" 129 #include "minddata/dataset/kernels/ir/vision/slice_patches_ir.h" 130 #include "minddata/dataset/kernels/ir/vision/swap_red_blue_ir.h" 131 #include "minddata/dataset/kernels/ir/vision/to_tensor_ir.h" 132 #include "minddata/dataset/kernels/ir/vision/uniform_aug_ir.h" 133 #include "minddata/dataset/kernels/ir/vision/vertical_flip_ir.h" 134 #include "minddata/dataset/text/ir/kernels/text_ir.h" 135 #include "minddata/dataset/util/status.h" 136 137 namespace mindspore { 138 namespace dataset { 139 /// \brief The Serdes class is used to serialize an IR tree into JSON string and dump into file if file name 140 /// specified. 141 class Serdes { 142 public: 143 /// \brief Constructor Serdes()144 Serdes() {} 145 146 /// \brief default destructor 147 ~Serdes() = default; 148 149 /// \brief function to serialize IR tree into JSON string and/or JSON file 150 /// \param[in] node IR node to be transferred 151 /// \param[in] filename The file name. If specified, save the generated JSON string into the file 152 /// \param[out] out_json The result json string 153 /// \return Status The status code returned 154 static Status SaveToJSON(std::shared_ptr<DatasetNode> node, const std::string &filename, nlohmann::json *out_json); 155 156 /// \brief Function to update the parameters [num_parallel_workers, connector_queue_size] in the serialized JSON 157 /// object of the optimized IR tree 158 /// \param[in, out] serialized_json The optimized ir tree json node 159 /// \param[in] op_map An ID to DatasetOp mapping 160 static Status UpdateOptimizedIRTreeJSON(nlohmann::json *serialized_json, 161 const std::map<int32_t, std::shared_ptr<DatasetOp>> &op_map); 162 163 /// \brief function to de-serialize JSON file to IR tree 164 /// \param[in] json_filepath input path of json file 165 /// \param[out] ds The deserialized dataset 166 /// \return Status The status code returned 167 static Status Deserialize(const std::string &json_filepath, std::shared_ptr<DatasetNode> *ds); 168 169 /// \brief Helper function to construct IR tree, separate zip and other operations 170 /// \param[in] json_obj The JSON object to be deserialized 171 /// \param[out] ds Shared pointer of a DatasetNode object containing the deserialized IR tree 172 /// \return Status The status code returned 173 static Status ConstructPipeline(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds); 174 175 /// \brief Helper functions for creating sampler, separate different samplers and call the related function 176 /// \param[in] json_obj The JSON object to be deserialized 177 /// \param[out] sampler Deserialized sampler 178 /// \return Status The status code returned 179 static Status ConstructSampler(nlohmann::json json_obj, std::shared_ptr<SamplerObj> *sampler); 180 181 /// \brief helper function to construct tensor operations 182 /// \param[in] json_obj json object of operations to be deserilized 183 /// \param[out] vector of tensor operation pointer 184 /// \return Status The status code returned 185 static Status ConstructTensorOps(nlohmann::json json_obj, std::vector<std::shared_ptr<TensorOperation>> *result); 186 187 /// \brief helper function to load tensor operations from dataset JSON and construct Execute object. 188 /// \param[in] map_json_string JSON string of dataset. 189 /// \param[out] data_graph Execute object contains tensor operations of map. 190 /// \return Status The status code returned. 191 static Status ParseMindIRPreprocess(const std::vector<std::string> &map_json_string, 192 std::vector<std::shared_ptr<mindspore::dataset::Execute>> *data_graph); 193 194 /// \brief Helper function to save JSON to a file 195 /// \param[in] json_string The JSON string to be saved to the file 196 /// \param[in] file_name The file name 197 /// \param[in] pretty Flag to control pretty printing of JSON string to the file 198 /// \return Status The status code returned 199 static Status SaveJSONToFile(const nlohmann::json &json_string, const std::string &file_name, bool pretty = false); 200 201 protected: 202 /// \brief Function to determine type of the node - dataset node if no dataset exists or operation node 203 /// \param[in] child_ds children datasets that is already created 204 /// \param[in] json_obj json object to read out type of the node 205 /// \param[out] ds Shared pointer of a DatasetNode object containing the deserialized IR tree 206 /// \return create new node based on the input dataset and type of the operation 207 static Status CreateNode(const std::shared_ptr<DatasetNode> &child_ds, nlohmann::json json_obj, 208 std::shared_ptr<DatasetNode> *ds); 209 210 /// \brief Helper functions for creating dataset nodes, separate different datasets and call the related function 211 /// \param[in] json_obj The JSON object to be deserialized 212 /// \param[in] op_type type of dataset 213 /// \param[out] ds Shared pointer of a DatasetNode object containing the deserialized IR tree 214 /// \return Status The status code returned 215 static Status CreateDatasetNode(const nlohmann::json &json_obj, const std::string &op_type, 216 std::shared_ptr<DatasetNode> *ds); 217 218 /// \brief Helper functions for creating operation nodes, separate different operations and call the related function 219 /// \param[in] json_obj The JSON object to be deserialized 220 /// \param[in] op_type type of dataset 221 /// \param[out] result Shared pointer of a DatasetNode object containing the deserialized IR tree 222 /// \return Status The status code returned 223 static Status CreateDatasetOperationNode(const std::shared_ptr<DatasetNode> &ds, const nlohmann::json &json_obj, 224 const std::string &op_type, std::shared_ptr<DatasetNode> *result); 225 226 /// \brief Helper function to map the function pointers 227 /// \return map of key to function pointer 228 static std::map<std::string, Status (*)(nlohmann::json json_obj, std::shared_ptr<TensorOperation> *operation)> 229 InitializeFuncPtr(); 230 231 /// \brief Helper function to perform recursive DFS on the optimized IR tree and to match each IR node with its 232 /// corresponding dataset op 233 /// \param [in, out] serialized_json The optimized ir tree json node 234 /// \param [in, out] op_id The id in execution tree from where to continue the IR Node - DatasetOp matching search 235 /// \param [in] op_map An ID to DatasetOp mapping 236 static Status RecurseUpdateOptimizedIRTreeJSON(nlohmann::json *serialized_json, int32_t *op_id, 237 const std::map<int32_t, std::shared_ptr<DatasetOp>> &op_map); 238 239 private: 240 static std::map<std::string, Status (*)(nlohmann::json json_obj, std::shared_ptr<TensorOperation> *operation)> 241 func_ptr_; 242 }; 243 244 } // namespace dataset 245 } // namespace mindspore 246 247 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_SERDES_H_ 248