• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_JSON_DATA_HELPER_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_JSON_DATA_HELPER_H_
18 
19 #include <fstream>
20 #include <iostream>
21 #include <map>
22 #include <memory>
23 #include <sstream>
24 #include <string>
25 #include <unordered_map>
26 #include <vector>
27 #include <nlohmann/json.hpp>
28 #include "./securec.h"
29 #include "minddata/dataset/util/log_adapter.h"
30 #include "minddata/dataset/util/path.h"
31 #include "minddata/dataset/util/status.h"
32 
33 namespace mindspore {
34 namespace dataset {
35 
36 /// \brief Simple class to do data manipulation, contains helper function to update json files in dataset
37 class JsonHelper {
38  public:
39   /// \brief constructor
JsonHelper()40   JsonHelper() {}
41 
42   /// \brief Destructor
43   ~JsonHelper() = default;
44 
45   /// \brief Create an Album dataset while taking in a path to a image folder
46   ///     Creates the output directory if doesn't exist
47   /// \param[in] in_dir Image folder directory that takes in images
48   /// \param[in] out_dir Directory containing output json files
49   Status CreateAlbum(const std::string &in_dir, const std::string &out_dir);
50 
51   /// \brief Update a json file field with a vector of integers
52   /// \param in_file The input file name to read in
53   /// \param key Key of field to write to
54   /// \param value Value array to write to file
55   /// \param out_file Optional input for output file path, will write to input file if not specified
56   /// \return Status The status code returned
57   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<std::string> &value,
58                      const std::string &out_file = "");
59 
60   /// \brief Update a json file field with a vector of type T values
61   /// \param in_file The input file name to read in
62   /// \param key Key of field to write to
63   /// \param value Value array to write to file
64   /// \param out_file Optional parameter for output file path, will write to input file if not specified
65   /// \return Status The status code returned
66   template <typename T>
67   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<T> &value,
68                      const std::string &out_file = "") {
69     try {
70       Path in = Path(in_file);
71       nlohmann::json js;
72       if (in.Exists()) {
73         RETURN_IF_NOT_OK(RealPath(in_file));
74         try {
75           std::ifstream in_stream(in_file);
76           MS_LOG(INFO) << "Filename: " << in_file << ".";
77           in_stream >> js;
78           in_stream.close();
catch(const std::exception & err)79         } catch (const std::exception &err) {
80           RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + in_file +
81                                    ", please delete it and try again!");
82         }
83       }
84       js[key] = value;
85       MS_LOG(INFO) << "Write outfile is: " << js << ".";
86 
87       if (out_file == "") {
88         std::ofstream o(in_file, std::ofstream::trunc);
89         o << js;
90         o.close();
91       } else {
92         std::ofstream o(out_file, std::ofstream::trunc);
93         o << js;
94         o.close();
95       }
96     }
97     // Catch any exception and convert to Status return code
catch(const std::exception & err)98     catch (const std::exception &err) {
99       RETURN_STATUS_UNEXPECTED("Update json failed ");
100     }
101     return Status::OK();
102   }
103 
104   /// \brief Update a json file field with a single value of of type T
105   /// \param in_file The input file name to read in
106   /// \param key Key of field to write to
107   /// \param value Value to write to file
108   /// \param out_file Optional parameter for output file path, will write to input file if not specified
109   /// \return Status The status code returned
110   template <typename T>
111   Status UpdateValue(const std::string &in_file, const std::string &key, const T &value,
112                      const std::string &out_file = "") {
113     try {
114       Path in = Path(in_file);
115       nlohmann::json js;
116       if (in.Exists()) {
117         RETURN_IF_NOT_OK(RealPath(in_file));
118         try {
119           std::ifstream in_stream(in_file);
120           MS_LOG(INFO) << "Filename: " << in_file << ".";
121           in_stream >> js;
122           in_stream.close();
catch(const std::exception & err)123         } catch (const std::exception &err) {
124           RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + in_file +
125                                    ", please delete it and try again!");
126         }
127       }
128       js[key] = value;
129       MS_LOG(INFO) << "Write outfile is: " << js << ".";
130       if (out_file == "") {
131         std::ofstream o(in_file, std::ofstream::trunc);
132         o << js;
133         o.close();
134       } else {
135         std::ofstream o(out_file, std::ofstream::trunc);
136         o << js;
137         o.close();
138       }
139     }
140     // Catch any exception and convert to Status return code
catch(const std::exception & err)141     catch (const std::exception &err) {
142       RETURN_STATUS_UNEXPECTED("Update json failed ");
143     }
144     return Status::OK();
145   }
146 
147   /// \brief Template function to write tensor to file
148   /// \param[in] in_file File to write to
149   /// \param[in] data Array of type T values
150   /// \return Status The status code returned
151   template <typename T>
WriteBinFile(const std::string & in_file,const std::vector<T> & data)152   Status WriteBinFile(const std::string &in_file, const std::vector<T> &data) {
153     try {
154       std::ofstream o(in_file, std::ios::binary | std::ios::out);
155       if (!o.is_open()) {
156         RETURN_STATUS_UNEXPECTED("Error opening Bin file to write");
157       }
158       size_t length = data.size();
159       o.write(reinterpret_cast<const char *>(&data[0]), std::streamsize(length * sizeof(T)));
160       o.close();
161     }
162     // Catch any exception and convert to Status return code
163     catch (const std::exception &err) {
164       RETURN_STATUS_UNEXPECTED("Write bin file failed ");
165     }
166     return Status::OK();
167   }
168 
169   /// \brief Write pointer to bin, use pointer to avoid memcpy
170   /// \param[in] in_file File name to write to
171   /// \param[in] data Pointer to data
172   /// \param[in] length Length of values to write from pointer
173   /// \return Status The status code returned
174   template <typename T>
WriteBinFile(const std::string & in_file,T * data,size_t length)175   Status WriteBinFile(const std::string &in_file, T *data, size_t length) {
176     try {
177       std::string real_in_file;
178       RETURN_IF_NOT_OK(Path::RealPath(in_file, real_in_file));
179       std::ofstream o(real_in_file, std::ios::binary | std::ios::out);
180       if (!o.is_open()) {
181         RETURN_STATUS_UNEXPECTED("Error opening Bin file to write");
182       }
183       o.write(reinterpret_cast<const char *>(data), std::streamsize(length * sizeof(T)));
184       o.close();
185     }
186     // Catch any exception and convert to Status return code
187     catch (const std::exception &err) {
188       RETURN_STATUS_UNEXPECTED("Write bin file failed ");
189     }
190     return Status::OK();
191   }
192 
193   /// \brief Helper function to copy content of a tensor to buffer
194   /// \note This function iterates over the tensor in bytes, since
195   /// \param[in] tensor_addr The memory held by a tensor, e.g. tensor->GetBuffer()
196   /// \param[in] tensor_size The amount of data in bytes in tensor_addr, e.g. tensor->SizeInBytes()
197   /// \param[out] addr The address to copy tensor data to
198   /// \param[in] buffer_size The buffer size of addr
199   /// \return The size of the tensor (bytes copied
200   size_t DumpData(const unsigned char *tensor_addr, const size_t &tensor_size, void *addr, const size_t &buffer_size);
201 
202   /// \brief Helper function to delete key in json file
203   /// \note This function will return okay even if key not found
204   /// \param[in] in_file Json file to remove key from
205   /// \param[in] key The key to remove
206   /// \return Status The status code returned
207   Status RemoveKey(const std::string &in_file, const std::string &key, const std::string &out_file = "");
208 
209   /// \brief A print method typically used for debugging
210   /// \param out - The output stream to write output to
211   void Print(std::ostream &out) const;
212 
213   /// \brief Helper function to check real path
214   /// \note This function will return okay even if key not found
215   /// \param[in] path Path to Json file
216   /// \return Status The status code returned
217   Status RealPath(const std::string &path);
218 
219   /// \brief << Stream output operator overload
220   /// \note This allows you to write the debug print info using stream operators
221   /// \param out Reference to the output stream being overloaded
222   /// \param dh Reference to the DataSchema to display
223   /// \return The output stream must be returned
224   friend std::ostream &operator<<(std::ostream &out, const JsonHelper &dh) {
225     dh.Print(out);
226     return out;
227   }
228 };
229 }  // namespace dataset
230 }  // namespace mindspore
231 
232 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_JSON_HELPER_H_
233