1 /** 2 * Copyright 2019-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_JSON_DATA_HELPER_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_JSON_DATA_HELPER_H_ 18 19 #include <fstream> 20 #include <iostream> 21 #include <map> 22 #include <memory> 23 #include <sstream> 24 #include <string> 25 #include <unordered_map> 26 #include <vector> 27 #include <nlohmann/json.hpp> 28 #include "./securec.h" 29 #include "minddata/dataset/util/log_adapter.h" 30 #include "minddata/dataset/util/path.h" 31 #include "minddata/dataset/util/status.h" 32 33 namespace mindspore { 34 namespace dataset { 35 36 /// \brief Simple class to do data manipulation, contains helper function to update json files in dataset 37 class JsonHelper { 38 public: 39 /// \brief constructor JsonHelper()40 JsonHelper() {} 41 42 /// \brief Destructor 43 ~JsonHelper() = default; 44 45 /// \brief Create an Album dataset while taking in a path to a image folder 46 /// Creates the output directory if doesn't exist 47 /// \param[in] in_dir Image folder directory that takes in images 48 /// \param[in] out_dir Directory containing output json files 49 Status CreateAlbum(const std::string &in_dir, const std::string &out_dir); 50 51 /// \brief Update a json file field with a vector of integers 52 /// \param in_file The input file name to read in 53 /// \param key Key of field to write to 54 /// \param value Value array to write to file 55 /// \param out_file Optional input for output file path, will write to input file if not specified 56 /// \return Status The status code returned 57 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<std::string> &value, 58 const std::string &out_file = ""); 59 60 /// \brief Update a json file field with a vector of type T values 61 /// \param in_file The input file name to read in 62 /// \param key Key of field to write to 63 /// \param value Value array to write to file 64 /// \param out_file Optional parameter for output file path, will write to input file if not specified 65 /// \return Status The status code returned 66 template <typename T> 67 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<T> &value, 68 const std::string &out_file = "") { 69 try { 70 Path in = Path(in_file); 71 nlohmann::json js; 72 if (in.Exists()) { 73 RETURN_IF_NOT_OK(RealPath(in_file)); 74 try { 75 std::ifstream in_stream(in_file); 76 MS_LOG(INFO) << "Filename: " << in_file << "."; 77 in_stream >> js; 78 in_stream.close(); catch(const std::exception & err)79 } catch (const std::exception &err) { 80 RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + in_file + 81 ", please delete it and try again!"); 82 } 83 } 84 js[key] = value; 85 MS_LOG(INFO) << "Write outfile is: " << js << "."; 86 87 if (out_file == "") { 88 std::ofstream o(in_file, std::ofstream::trunc); 89 o << js; 90 o.close(); 91 } else { 92 std::ofstream o(out_file, std::ofstream::trunc); 93 o << js; 94 o.close(); 95 } 96 } 97 // Catch any exception and convert to Status return code catch(const std::exception & err)98 catch (const std::exception &err) { 99 RETURN_STATUS_UNEXPECTED("Update json failed "); 100 } 101 return Status::OK(); 102 } 103 104 /// \brief Update a json file field with a single value of of type T 105 /// \param in_file The input file name to read in 106 /// \param key Key of field to write to 107 /// \param value Value to write to file 108 /// \param out_file Optional parameter for output file path, will write to input file if not specified 109 /// \return Status The status code returned 110 template <typename T> 111 Status UpdateValue(const std::string &in_file, const std::string &key, const T &value, 112 const std::string &out_file = "") { 113 try { 114 Path in = Path(in_file); 115 nlohmann::json js; 116 if (in.Exists()) { 117 RETURN_IF_NOT_OK(RealPath(in_file)); 118 try { 119 std::ifstream in_stream(in_file); 120 MS_LOG(INFO) << "Filename: " << in_file << "."; 121 in_stream >> js; 122 in_stream.close(); catch(const std::exception & err)123 } catch (const std::exception &err) { 124 RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + in_file + 125 ", please delete it and try again!"); 126 } 127 } 128 js[key] = value; 129 MS_LOG(INFO) << "Write outfile is: " << js << "."; 130 if (out_file == "") { 131 std::ofstream o(in_file, std::ofstream::trunc); 132 o << js; 133 o.close(); 134 } else { 135 std::ofstream o(out_file, std::ofstream::trunc); 136 o << js; 137 o.close(); 138 } 139 } 140 // Catch any exception and convert to Status return code catch(const std::exception & err)141 catch (const std::exception &err) { 142 RETURN_STATUS_UNEXPECTED("Update json failed "); 143 } 144 return Status::OK(); 145 } 146 147 /// \brief Template function to write tensor to file 148 /// \param[in] in_file File to write to 149 /// \param[in] data Array of type T values 150 /// \return Status The status code returned 151 template <typename T> WriteBinFile(const std::string & in_file,const std::vector<T> & data)152 Status WriteBinFile(const std::string &in_file, const std::vector<T> &data) { 153 try { 154 std::ofstream o(in_file, std::ios::binary | std::ios::out); 155 if (!o.is_open()) { 156 RETURN_STATUS_UNEXPECTED("Error opening Bin file to write"); 157 } 158 size_t length = data.size(); 159 o.write(reinterpret_cast<const char *>(&data[0]), std::streamsize(length * sizeof(T))); 160 o.close(); 161 } 162 // Catch any exception and convert to Status return code 163 catch (const std::exception &err) { 164 RETURN_STATUS_UNEXPECTED("Write bin file failed "); 165 } 166 return Status::OK(); 167 } 168 169 /// \brief Write pointer to bin, use pointer to avoid memcpy 170 /// \param[in] in_file File name to write to 171 /// \param[in] data Pointer to data 172 /// \param[in] length Length of values to write from pointer 173 /// \return Status The status code returned 174 template <typename T> WriteBinFile(const std::string & in_file,T * data,size_t length)175 Status WriteBinFile(const std::string &in_file, T *data, size_t length) { 176 try { 177 std::string real_in_file; 178 RETURN_IF_NOT_OK(Path::RealPath(in_file, real_in_file)); 179 std::ofstream o(real_in_file, std::ios::binary | std::ios::out); 180 if (!o.is_open()) { 181 RETURN_STATUS_UNEXPECTED("Error opening Bin file to write"); 182 } 183 o.write(reinterpret_cast<const char *>(data), std::streamsize(length * sizeof(T))); 184 o.close(); 185 } 186 // Catch any exception and convert to Status return code 187 catch (const std::exception &err) { 188 RETURN_STATUS_UNEXPECTED("Write bin file failed "); 189 } 190 return Status::OK(); 191 } 192 193 /// \brief Helper function to copy content of a tensor to buffer 194 /// \note This function iterates over the tensor in bytes, since 195 /// \param[in] tensor_addr The memory held by a tensor, e.g. tensor->GetBuffer() 196 /// \param[in] tensor_size The amount of data in bytes in tensor_addr, e.g. tensor->SizeInBytes() 197 /// \param[out] addr The address to copy tensor data to 198 /// \param[in] buffer_size The buffer size of addr 199 /// \return The size of the tensor (bytes copied 200 size_t DumpData(const unsigned char *tensor_addr, const size_t &tensor_size, void *addr, const size_t &buffer_size); 201 202 /// \brief Helper function to delete key in json file 203 /// \note This function will return okay even if key not found 204 /// \param[in] in_file Json file to remove key from 205 /// \param[in] key The key to remove 206 /// \return Status The status code returned 207 Status RemoveKey(const std::string &in_file, const std::string &key, const std::string &out_file = ""); 208 209 /// \brief A print method typically used for debugging 210 /// \param out - The output stream to write output to 211 void Print(std::ostream &out) const; 212 213 /// \brief Helper function to check real path 214 /// \note This function will return okay even if key not found 215 /// \param[in] path Path to Json file 216 /// \return Status The status code returned 217 Status RealPath(const std::string &path); 218 219 /// \brief << Stream output operator overload 220 /// \note This allows you to write the debug print info using stream operators 221 /// \param out Reference to the output stream being overloaded 222 /// \param dh Reference to the DataSchema to display 223 /// \return The output stream must be returned 224 friend std::ostream &operator<<(std::ostream &out, const JsonHelper &dh) { 225 dh.Print(out); 226 return out; 227 } 228 }; 229 } // namespace dataset 230 } // namespace mindspore 231 232 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_JSON_HELPER_H_ 233