1 /** 2 * Copyright 2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_SERDES_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_SERDES_H_ 18 19 #include <algorithm> 20 #include <cstring> 21 #include <fstream> 22 #include <iostream> 23 #include <map> 24 #include <memory> 25 #include <set> 26 #include <string> 27 #include <vector> 28 #include <unordered_set> 29 #include <utility> 30 #include <nlohmann/json.hpp> 31 32 #include "minddata/dataset/core/tensor.h" 33 34 #include "minddata/dataset/engine/ir/datasetops/batch_node.h" 35 #include "minddata/dataset/engine/ir/datasetops/concat_node.h" 36 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h" 37 #include "minddata/dataset/engine/ir/datasetops/map_node.h" 38 #include "minddata/dataset/engine/ir/datasetops/project_node.h" 39 #include "minddata/dataset/engine/ir/datasetops/rename_node.h" 40 #include "minddata/dataset/engine/ir/datasetops/repeat_node.h" 41 #include "minddata/dataset/engine/ir/datasetops/shuffle_node.h" 42 #include "minddata/dataset/engine/ir/datasetops/skip_node.h" 43 #include "minddata/dataset/engine/ir/datasetops/transfer_node.h" 44 #include "minddata/dataset/engine/ir/datasetops/take_node.h" 45 #include "minddata/dataset/engine/ir/datasetops/zip_node.h" 46 47 #include "minddata/dataset/engine/ir/datasetops/source/album_node.h" 48 #include "minddata/dataset/engine/ir/datasetops/source/celeba_node.h" 49 #include "minddata/dataset/engine/ir/datasetops/source/cifar10_node.h" 50 #include "minddata/dataset/engine/ir/datasetops/source/cifar100_node.h" 51 #include "minddata/dataset/engine/ir/datasetops/source/clue_node.h" 52 #include "minddata/dataset/engine/ir/datasetops/source/coco_node.h" 53 #include "minddata/dataset/engine/ir/datasetops/source/csv_node.h" 54 #include "minddata/dataset/engine/ir/datasetops/source/flickr_node.h" 55 #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" 56 #include "minddata/dataset/engine/ir/datasetops/source/manifest_node.h" 57 #include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h" 58 #include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h" 59 #include "minddata/dataset/engine/ir/datasetops/source/tf_record_node.h" 60 #include "minddata/dataset/engine/ir/datasetops/source/voc_node.h" 61 62 #include "minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.h" 63 #include "minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.h" 64 #include "minddata/dataset/engine/ir/datasetops/source/samplers/prebuilt_sampler_ir.h" 65 #include "minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.h" 66 #include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h" 67 #include "minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.h" 68 #include "minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.h" 69 #include "minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.h" 70 #include "minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.h" 71 72 #include "minddata/dataset/include/dataset/constants.h" 73 #include "minddata/dataset/include/dataset/datasets.h" 74 #include "minddata/dataset/include/dataset/execute.h" 75 #include "minddata/dataset/include/dataset/iterator.h" 76 #include "minddata/dataset/include/dataset/samplers.h" 77 #include "minddata/dataset/include/dataset/transforms.h" 78 #include "minddata/dataset/include/dataset/vision.h" 79 80 #include "minddata/dataset/kernels/py_func_op.h" 81 #include "minddata/dataset/kernels/ir/data/transforms_ir.h" 82 #include "minddata/dataset/kernels/ir/vision/adjust_gamma_ir.h" 83 #include "minddata/dataset/kernels/ir/vision/affine_ir.h" 84 #include "minddata/dataset/kernels/ir/vision/ascend_vision_ir.h" 85 #include "minddata/dataset/kernels/ir/vision/auto_contrast_ir.h" 86 #include "minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h" 87 #include "minddata/dataset/kernels/ir/vision/center_crop_ir.h" 88 #include "minddata/dataset/kernels/ir/vision/crop_ir.h" 89 #include "minddata/dataset/kernels/ir/vision/cutmix_batch_ir.h" 90 #include "minddata/dataset/kernels/ir/vision/cutout_ir.h" 91 #include "minddata/dataset/kernels/ir/vision/decode_ir.h" 92 #include "minddata/dataset/kernels/ir/vision/equalize_ir.h" 93 #include "minddata/dataset/kernels/ir/vision/gaussian_blur_ir.h" 94 #include "minddata/dataset/kernels/ir/vision/horizontal_flip_ir.h" 95 #include "minddata/dataset/kernels/ir/vision/hwc_to_chw_ir.h" 96 #include "minddata/dataset/kernels/ir/vision/invert_ir.h" 97 #include "minddata/dataset/kernels/ir/vision/mixup_batch_ir.h" 98 #include "minddata/dataset/kernels/ir/vision/normalize_ir.h" 99 #include "minddata/dataset/kernels/ir/vision/normalize_pad_ir.h" 100 #include "minddata/dataset/kernels/ir/vision/pad_ir.h" 101 #include "minddata/dataset/kernels/ir/vision/random_affine_ir.h" 102 #include "minddata/dataset/kernels/ir/vision/random_color_adjust_ir.h" 103 #include "minddata/dataset/kernels/ir/vision/random_color_ir.h" 104 #include "minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.h" 105 #include "minddata/dataset/kernels/ir/vision/random_crop_ir.h" 106 #include "minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.h" 107 #include "minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.h" 108 #include "minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.h" 109 #include "minddata/dataset/kernels/ir/vision/random_posterize_ir.h" 110 #include "minddata/dataset/kernels/ir/vision/random_resized_crop_ir.h" 111 #include "minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.h" 112 #include "minddata/dataset/kernels/ir/vision/random_resize_ir.h" 113 #include "minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.h" 114 #include "minddata/dataset/kernels/ir/vision/random_rotation_ir.h" 115 #include "minddata/dataset/kernels/ir/vision/random_select_subpolicy_ir.h" 116 #include "minddata/dataset/kernels/ir/vision/random_sharpness_ir.h" 117 #include "minddata/dataset/kernels/ir/vision/random_solarize_ir.h" 118 #include "minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.h" 119 #include "minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.h" 120 #include "minddata/dataset/kernels/ir/vision/rescale_ir.h" 121 #include "minddata/dataset/kernels/ir/vision/resize_ir.h" 122 #include "minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.h" 123 #include "minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.h" 124 #include "minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.h" 125 #include "minddata/dataset/kernels/ir/vision/rgba_to_rgb_ir.h" 126 #include "minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h" 127 #include "minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h" 128 #include "minddata/dataset/kernels/ir/vision/rotate_ir.h" 129 #include "minddata/dataset/kernels/ir/vision/slice_patches_ir.h" 130 #include "minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.h" 131 #include "minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.h" 132 #include "minddata/dataset/kernels/ir/vision/swap_red_blue_ir.h" 133 #include "minddata/dataset/kernels/ir/vision/uniform_aug_ir.h" 134 #include "minddata/dataset/kernels/ir/vision/vertical_flip_ir.h" 135 #include "minddata/dataset/text/ir/kernels/text_ir.h" 136 #include "minddata/dataset/util/status.h" 137 138 namespace mindspore { 139 namespace dataset { 140 /// \brief The Serdes class is used to serialize an IR tree into JSON string and dump into file if file name 141 /// specified. 142 class Serdes { 143 public: 144 /// \brief Constructor Serdes()145 Serdes() {} 146 147 /// \brief default destructor 148 ~Serdes() = default; 149 150 /// \brief function to serialize IR tree into JSON string and/or JSON file 151 /// \param[in] node IR node to be transferred 152 /// \param[in] filename The file name. If specified, save the generated JSON string into the file 153 /// \param[out] out_json The result json string 154 /// \return Status The status code returned 155 static Status SaveToJSON(std::shared_ptr<DatasetNode> node, const std::string &filename, nlohmann::json *out_json); 156 157 /// \brief function to de-serialize JSON file to IR tree 158 /// \param[in] json_filepath input path of json file 159 /// \param[out] ds The deserialized dataset 160 /// \return Status The status code returned 161 static Status Deserialize(const std::string &json_filepath, std::shared_ptr<DatasetNode> *ds); 162 163 /// \brief Helper function to construct IR tree, separate zip and other operations 164 /// \param[in] json_obj The JSON object to be deserialized 165 /// \param[out] ds Shared pointer of a DatasetNode object containing the deserialized IR tree 166 /// \return Status The status code returned 167 static Status ConstructPipeline(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds); 168 169 /// \brief Helper functions for creating sampler, separate different samplers and call the related function 170 /// \param[in] json_obj The JSON object to be deserialized 171 /// \param[out] sampler Deserialized sampler 172 /// \return Status The status code returned 173 static Status ConstructSampler(nlohmann::json json_obj, std::shared_ptr<SamplerObj> *sampler); 174 175 /// \brief helper function to construct tensor operations 176 /// \param[in] json_obj json object of operations to be deserilized 177 /// \param[out] vector of tensor operation pointer 178 /// \return Status The status code returned 179 static Status ConstructTensorOps(nlohmann::json json_obj, std::vector<std::shared_ptr<TensorOperation>> *result); 180 181 /// \brief helper function to load tensor operations from dataset JSON and construct Execute object. 182 /// \param[in] dataset_json JSON string of dataset. 183 /// \param[in] process_column Select all map operations which process this column. 184 /// \param[out] data_graph Execute object contains tensor operations of map. 185 /// \return Status The status code returned. 186 static Status ParseMindIRPreprocess(const std::string &dataset_json, const std::string &process_column, 187 std::vector<std::shared_ptr<mindspore::dataset::Execute>> *data_graph); 188 189 protected: 190 /// \brief Helper function to save JSON to a file 191 /// \param[in] json_string The JSON string to be saved to the file 192 /// \param[in] file_name The file name 193 /// \return Status The status code returned 194 static Status SaveJSONToFile(nlohmann::json json_string, const std::string &file_name); 195 196 /// \brief Function to determine type of the node - dataset node if no dataset exists or operation node 197 /// \param[in] child_ds children datasets that is already created 198 /// \param[in] json_obj json object to read out type of the node 199 /// \param[out] ds Shared pointer of a DatasetNode object containing the deserialized IR tree 200 /// \return create new node based on the input dataset and type of the operation 201 static Status CreateNode(const std::shared_ptr<DatasetNode> &child_ds, nlohmann::json json_obj, 202 std::shared_ptr<DatasetNode> *ds); 203 204 /// \brief Helper functions for creating dataset nodes, separate different datasets and call the related function 205 /// \param[in] json_obj The JSON object to be deserialized 206 /// \param[in] op_type type of dataset 207 /// \param[out] ds Shared pointer of a DatasetNode object containing the deserialized IR tree 208 /// \return Status The status code returned 209 static Status CreateDatasetNode(const nlohmann::json &json_obj, const std::string &op_type, 210 std::shared_ptr<DatasetNode> *ds); 211 212 /// \brief Helper functions for creating operation nodes, separate different operations and call the related function 213 /// \param[in] json_obj The JSON object to be deserialized 214 /// \param[in] op_type type of dataset 215 /// \param[out] result Shared pointer of a DatasetNode object containing the deserialized IR tree 216 /// \return Status The status code returned 217 static Status CreateDatasetOperationNode(const std::shared_ptr<DatasetNode> &ds, const nlohmann::json &json_obj, 218 const std::string &op_type, std::shared_ptr<DatasetNode> *result); 219 220 /// \brief Helper function to map the function pointers 221 /// \return map of key to function pointer 222 static std::map<std::string, Status (*)(nlohmann::json json_obj, std::shared_ptr<TensorOperation> *operation)> 223 InitializeFuncPtr(); 224 225 private: 226 static std::map<std::string, Status (*)(nlohmann::json json_obj, std::shared_ptr<TensorOperation> *operation)> 227 func_ptr_; 228 }; 229 230 } // namespace dataset 231 } // namespace mindspore 232 233 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_SERDES_H_ 234