1 /** 2 * Copyright 2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_DATA_HELPER_H_ 17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_DATA_HELPER_H_ 18 19 #include <sys/stat.h> 20 #include <fstream> 21 #include <iostream> 22 #include <map> 23 #include <memory> 24 #include <sstream> 25 #include <string> 26 #include <unordered_map> 27 #include <vector> 28 29 #include "include/api/dual_abi_helper.h" 30 #include "include/api/status.h" 31 32 namespace mindspore { 33 namespace dataset { 34 35 /// \brief Simple class to do data manipulation, contains helper function to update json files in dataset 36 class DataHelper { 37 public: 38 /// \brief constructor DataHelper()39 DataHelper() {} 40 41 /// \brief Destructor 42 ~DataHelper() = default; 43 44 /// \brief Create an Album dataset while taking in a path to a image folder 45 /// Creates the output directory if doesn't exist 46 /// \param[in] in_dir Image folder directory that takes in images 47 /// \param[in] out_dir Directory containing output json files 48 /// \return Status The status code returned CreateAlbum(const std::string & in_dir,const std::string & out_dir)49 Status CreateAlbum(const std::string &in_dir, const std::string &out_dir) { 50 return CreateAlbumIF(StringToChar(in_dir), StringToChar(out_dir)); 51 } 52 53 /// \brief Update a json file field with a vector of string values 54 /// \param in_file The input file name to read in 55 /// \param key Key of field to write to 56 /// \param value Value array to write to file 57 /// \param out_file Optional input for output file path, will write to input file if not specified 58 /// \return Status The status code returned 59 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<std::string> &value, 60 const std::string &out_file = "") { 61 return UpdateArrayIF(StringToChar(in_file), StringToChar(key), VectorStringToChar(value), StringToChar(out_file)); 62 } 63 64 /// \brief Update a json file field with a vector of bool values 65 /// \param in_file The input file name to read in 66 /// \param key Key of field to write to 67 /// \param value Value array to write to file 68 /// \param out_file Optional parameter for output file path, will write to input file if not specified 69 /// \return Status The status code returned 70 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<bool> &value, 71 const std::string &out_file = "") { 72 return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 73 } 74 75 /// \brief Update a json file field with a vector of int8 values 76 /// \param in_file The input file name to read in 77 /// \param key Key of field to write to 78 /// \param value Value array to write to file 79 /// \param out_file Optional parameter for output file path, will write to input file if not specified 80 /// \return Status The status code returned 81 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int8_t> &value, 82 const std::string &out_file = "") { 83 return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 84 } 85 86 /// \brief Update a json file field with a vector of uint8 values 87 /// \param in_file The input file name to read in 88 /// \param key Key of field to write to 89 /// \param value Value array to write to file 90 /// \param out_file Optional parameter for output file path, will write to input file if not specified 91 /// \return Status The status code returned 92 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint8_t> &value, 93 const std::string &out_file = "") { 94 return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 95 } 96 97 /// \brief Update a json file field with a vector of int16 values 98 /// \param in_file The input file name to read in 99 /// \param key Key of field to write to 100 /// \param value Value array to write to file 101 /// \param out_file Optional parameter for output file path, will write to input file if not specified 102 /// \return Status The status code returned 103 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int16_t> &value, 104 const std::string &out_file = "") { 105 return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 106 } 107 108 /// \brief Update a json file field with a vector of uint16 values 109 /// \param in_file The input file name to read in 110 /// \param key Key of field to write to 111 /// \param value Value array to write to file 112 /// \param out_file Optional parameter for output file path, will write to input file if not specified 113 /// \return Status The status code returned 114 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint16_t> &value, 115 const std::string &out_file = "") { 116 return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 117 } 118 119 /// \brief Update a json file field with a vector of int32 values 120 /// \param in_file The input file name to read in 121 /// \param key Key of field to write to 122 /// \param value Value array to write to file 123 /// \param out_file Optional parameter for output file path, will write to input file if not specified 124 /// \return Status The status code returned 125 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int32_t> &value, 126 const std::string &out_file = "") { 127 return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 128 } 129 130 /// \brief Update a json file field with a vector of uint32 values 131 /// \param in_file The input file name to read in 132 /// \param key Key of field to write to 133 /// \param value Value array to write to file 134 /// \param out_file Optional parameter for output file path, will write to input file if not specified 135 /// \return Status The status code returned 136 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint32_t> &value, 137 const std::string &out_file = "") { 138 return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 139 } 140 141 /// \brief Update a json file field with a vector of int64 values 142 /// \param in_file The input file name to read in 143 /// \param key Key of field to write to 144 /// \param value Value array to write to file 145 /// \param out_file Optional parameter for output file path, will write to input file if not specified 146 /// \return Status The status code returned 147 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int64_t> &value, 148 const std::string &out_file = "") { 149 return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 150 } 151 152 /// \brief Update a json file field with a vector of uint64 values 153 /// \param in_file The input file name to read in 154 /// \param key Key of field to write to 155 /// \param value Value array to write to file 156 /// \param out_file Optional parameter for output file path, will write to input file if not specified 157 /// \return Status The status code returned 158 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint64_t> &value, 159 const std::string &out_file = "") { 160 return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 161 } 162 163 /// \brief Update a json file field with a vector of float values 164 /// \param in_file The input file name to read in 165 /// \param key Key of field to write to 166 /// \param value Value array to write to file 167 /// \param out_file Optional parameter for output file path, will write to input file if not specified 168 /// \return Status The status code returned 169 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<float> &value, 170 const std::string &out_file = "") { 171 return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 172 } 173 174 /// \brief Update a json file field with a vector of double values 175 /// \param in_file The input file name to read in 176 /// \param key Key of field to write to 177 /// \param value Value array to write to file 178 /// \param out_file Optional parameter for output file path, will write to input file if not specified 179 /// \return Status The status code returned 180 Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<double> &value, 181 const std::string &out_file = "") { 182 return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 183 } 184 185 /// \brief Update a json file field with a string value 186 /// \param in_file The input file name to read in 187 /// \param key Key of field to write to 188 /// \param value Value to write to file 189 /// \param out_file Optional parameter for output file path, will write to input file if not specified 190 /// \return Status The status code returned 191 Status UpdateValue(const std::string &in_file, const std::string &key, const std::string &value, 192 const std::string &out_file = "") { 193 return UpdateValueIF(StringToChar(in_file), StringToChar(key), StringToChar(value), StringToChar(out_file)); 194 } 195 196 /// \brief Update a json file field with a bool value 197 /// \param in_file The input file name to read in 198 /// \param key Key of field to write to 199 /// \param value Value to write to file 200 /// \param out_file Optional parameter for output file path, will write to input file if not specified 201 /// \return Status The status code returned 202 Status UpdateValue(const std::string &in_file, const std::string &key, const bool &value, 203 const std::string &out_file = "") { 204 return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 205 } 206 207 /// \brief Update a json file field with an int8 value 208 /// \param in_file The input file name to read in 209 /// \param key Key of field to write to 210 /// \param value Value to write to file 211 /// \param out_file Optional parameter for output file path, will write to input file if not specified 212 /// \return Status The status code returned 213 Status UpdateValue(const std::string &in_file, const std::string &key, const int8_t &value, 214 const std::string &out_file = "") { 215 return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 216 } 217 218 /// \brief Update a json file field with an uint8 value 219 /// \param in_file The input file name to read in 220 /// \param key Key of field to write to 221 /// \param value Value to write to file 222 /// \param out_file Optional parameter for output file path, will write to input file if not specified 223 /// \return Status The status code returned 224 Status UpdateValue(const std::string &in_file, const std::string &key, const uint8_t &value, 225 const std::string &out_file = "") { 226 return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 227 } 228 229 /// \brief Update a json file field with an int16 value 230 /// \param in_file The input file name to read in 231 /// \param key Key of field to write to 232 /// \param value Value to write to file 233 /// \param out_file Optional parameter for output file path, will write to input file if not specified 234 /// \return Status The status code returned 235 Status UpdateValue(const std::string &in_file, const std::string &key, const int16_t &value, 236 const std::string &out_file = "") { 237 return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 238 } 239 240 /// \brief Update a json file field with an uint16 value 241 /// \param in_file The input file name to read in 242 /// \param key Key of field to write to 243 /// \param value Value to write to file 244 /// \param out_file Optional parameter for output file path, will write to input file if not specified 245 /// \return Status The status code returned 246 Status UpdateValue(const std::string &in_file, const std::string &key, const uint16_t &value, 247 const std::string &out_file = "") { 248 return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 249 } 250 251 /// \brief Update a json file field with an int32 value 252 /// \param in_file The input file name to read in 253 /// \param key Key of field to write to 254 /// \param value Value to write to file 255 /// \param out_file Optional parameter for output file path, will write to input file if not specified 256 /// \return Status The status code returned 257 Status UpdateValue(const std::string &in_file, const std::string &key, const int32_t &value, 258 const std::string &out_file = "") { 259 return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 260 } 261 262 /// \brief Update a json file field with an uint32 value 263 /// \param in_file The input file name to read in 264 /// \param key Key of field to write to 265 /// \param value Value to write to file 266 /// \param out_file Optional parameter for output file path, will write to input file if not specified 267 /// \return Status The status code returned 268 Status UpdateValue(const std::string &in_file, const std::string &key, const uint32_t &value, 269 const std::string &out_file = "") { 270 return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 271 } 272 273 /// \brief Update a json file field with an int64 value 274 /// \param in_file The input file name to read in 275 /// \param key Key of field to write to 276 /// \param value Value to write to file 277 /// \param out_file Optional parameter for output file path, will write to input file if not specified 278 /// \return Status The status code returned 279 Status UpdateValue(const std::string &in_file, const std::string &key, const int64_t &value, 280 const std::string &out_file = "") { 281 return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 282 } 283 284 /// \brief Update a json file field with an uint64 value 285 /// \param in_file The input file name to read in 286 /// \param key Key of field to write to 287 /// \param value Value to write to file 288 /// \param out_file Optional parameter for output file path, will write to input file if not specified 289 /// \return Status The status code returned 290 Status UpdateValue(const std::string &in_file, const std::string &key, const uint64_t &value, 291 const std::string &out_file = "") { 292 return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 293 } 294 295 /// \brief Update a json file field with a float value 296 /// \param in_file The input file name to read in 297 /// \param key Key of field to write to 298 /// \param value Value to write to file 299 /// \param out_file Optional parameter for output file path, will write to input file if not specified 300 /// \return Status The status code returned 301 Status UpdateValue(const std::string &in_file, const std::string &key, const float &value, 302 const std::string &out_file = "") { 303 return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 304 } 305 306 /// \brief Update a json file field with a double value 307 /// \param in_file The input file name to read in 308 /// \param key Key of field to write to 309 /// \param value Value to write to file 310 /// \param out_file Optional parameter for output file path, will write to input file if not specified 311 /// \return Status The status code returned 312 Status UpdateValue(const std::string &in_file, const std::string &key, const double &value, 313 const std::string &out_file = "") { 314 return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file)); 315 } 316 317 /// \brief Template function to write tensor to file 318 /// \param[in] in_file File to write to 319 /// \param[in] data Array of type T values 320 /// \return Status The status code returned 321 template <typename T> WriteBinFile(const std::string & in_file,const std::vector<T> & data)322 Status WriteBinFile(const std::string &in_file, const std::vector<T> &data) { 323 try { 324 std::ofstream o(in_file, std::ios::binary | std::ios::out); 325 if (!o.is_open()) { 326 return Status(kMDUnexpectedError, "Error opening Bin file to write"); 327 } 328 size_t length = data.size(); 329 if (length == 0) { 330 return Status(kMDUnexpectedError, "size of data is 0 when written into file."); 331 } 332 o.write(reinterpret_cast<const char *>(&data[0]), std::streamsize(length * sizeof(T))); 333 o.close(); 334 } 335 // Catch any exception and convert to Status return code 336 catch (const std::exception &err) { 337 return Status(kMDUnexpectedError, "Write bin file failed "); 338 } 339 return Status::OK(); 340 } 341 342 /// \brief Write pointer to bin, use pointer to avoid memcpy 343 /// \note The value of `length`` must be equal to the length of `data` 344 /// \param[in] in_file File name to write to 345 /// \param[in] data Pointer to data 346 /// \param[in] length Length of values to write from pointer 347 /// \return Status The status code returned 348 template <typename T> WriteBinFile(const std::string & in_file,T * data,size_t length)349 Status WriteBinFile(const std::string &in_file, T *data, size_t length) { 350 try { 351 if (data == nullptr) { 352 return Status(kMDUnexpectedError, "input data can not be null"); 353 } 354 std::ofstream o(in_file, std::ios::binary | std::ios::out); 355 if (!o.is_open()) { 356 return Status(kMDUnexpectedError, "Error opening Bin file to write"); 357 } 358 o.write(reinterpret_cast<const char *>(data), std::streamsize(length * sizeof(T))); 359 if (!o.good()) { 360 return Status(kMDUnexpectedError, "Error writing Bin file"); 361 } 362 o.close(); 363 } 364 // Catch any exception and convert to Status return code 365 catch (const std::exception &err) { 366 return Status(kMDUnexpectedError, "Write bin file failed"); 367 } 368 return Status::OK(); 369 } 370 371 /// \brief Helper function to copy content of a tensor to buffer 372 /// \note This function iterates over the tensor in bytes, since 373 /// \param[in] tensor_addr The memory held by a tensor 374 /// \param[in] tensor_size The amount of data in bytes in tensor_addr, e.g. tensor->SizeInBytes() 375 /// \param[out] addr The address to copy tensor data to 376 /// \param[in] buffer_size The buffer size of addr 377 /// \return The size of the tensor (bytes copied 378 size_t DumpData(const unsigned char *tensor_addr, const size_t &tensor_size, void *addr, const size_t &buffer_size); 379 380 /// \brief Helper function to delete key in json file 381 /// \note This function will return okay even if key not found 382 /// \param[in] in_file Json file to remove key from 383 /// \param[in] key The key to remove 384 /// \return Status The status code returned 385 Status RemoveKey(const std::string &in_file, const std::string &key, const std::string &out_file = "") { 386 return RemoveKeyIF(StringToChar(in_file), StringToChar(key), StringToChar(out_file)); 387 } 388 389 /// \brief A print method typically used for debugging 390 /// \param out - The output stream to write output to 391 void Print(std::ostream &out) const; 392 393 /// \brief << Stream output operator overload 394 /// \note This allows you to write the debug print info using stream operators 395 /// \param out Reference to the output stream being overloaded 396 /// \param dh Reference to the DataSchema to display 397 /// \return The output stream must be returned 398 friend std::ostream &operator<<(std::ostream &out, const DataHelper &dh) { 399 dh.Print(out); 400 return out; 401 } 402 403 private: 404 // Helper function for dual ABI support 405 Status CreateAlbumIF(const std::vector<char> &in_dir, const std::vector<char> &out_dir); 406 Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, 407 const std::vector<std::vector<char>> &value, const std::vector<char> &out_file); 408 Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<bool> &value, 409 const std::vector<char> &out_file); 410 Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<int8_t> &value, 411 const std::vector<char> &out_file); 412 Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, 413 const std::vector<uint8_t> &value, const std::vector<char> &out_file); 414 Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, 415 const std::vector<int16_t> &value, const std::vector<char> &out_file); 416 Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, 417 const std::vector<uint16_t> &value, const std::vector<char> &out_file); 418 Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, 419 const std::vector<int32_t> &value, const std::vector<char> &out_file); 420 Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, 421 const std::vector<uint32_t> &value, const std::vector<char> &out_file); 422 Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, 423 const std::vector<int64_t> &value, const std::vector<char> &out_file); 424 Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, 425 const std::vector<uint64_t> &value, const std::vector<char> &out_file); 426 Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<float> &value, 427 const std::vector<char> &out_file); 428 Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<double> &value, 429 const std::vector<char> &out_file); 430 431 Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<char> &value, 432 const std::vector<char> &out_file); 433 Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const bool &value, 434 const std::vector<char> &out_file); 435 Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int8_t &value, 436 const std::vector<char> &out_file); 437 Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint8_t &value, 438 const std::vector<char> &out_file); 439 Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int16_t &value, 440 const std::vector<char> &out_file); 441 Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint16_t &value, 442 const std::vector<char> &out_file); 443 Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int32_t &value, 444 const std::vector<char> &out_file); 445 Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint32_t &value, 446 const std::vector<char> &out_file); 447 Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int64_t &value, 448 const std::vector<char> &out_file); 449 Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint64_t &value, 450 const std::vector<char> &out_file); 451 Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const float &value, 452 const std::vector<char> &out_file); 453 Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const double &value, 454 const std::vector<char> &out_file); 455 Status RemoveKeyIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<char> &out_file); 456 }; 457 } // namespace dataset 458 } // namespace mindspore 459 460 #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_DATA_HELPER_H_ 461