• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_DATA_HELPER_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_DATA_HELPER_H_
18 
19 #include <sys/stat.h>
20 #include <fstream>
21 #include <iostream>
22 #include <map>
23 #include <memory>
24 #include <sstream>
25 #include <string>
26 #include <unordered_map>
27 #include <vector>
28 
29 #include "include/api/dual_abi_helper.h"
30 #include "include/api/status.h"
31 
32 namespace mindspore {
33 namespace dataset {
34 
35 /// \brief Simple class to do data manipulation, contains helper function to update json files in dataset
36 class DataHelper {
37  public:
38   /// \brief constructor
DataHelper()39   DataHelper() {}
40 
41   /// \brief Destructor
42   ~DataHelper() = default;
43 
44   /// \brief Create an Album dataset while taking in a path to a image folder
45   ///     Creates the output directory if doesn't exist
46   /// \param[in] in_dir Image folder directory that takes in images
47   /// \param[in] out_dir Directory containing output json files
48   /// \return Status The status code returned
CreateAlbum(const std::string & in_dir,const std::string & out_dir)49   Status CreateAlbum(const std::string &in_dir, const std::string &out_dir) {
50     return CreateAlbumIF(StringToChar(in_dir), StringToChar(out_dir));
51   }
52 
53   /// \brief Update a json file field with a vector of string values
54   /// \param in_file The input file name to read in
55   /// \param key Key of field to write to
56   /// \param value Value array to write to file
57   /// \param out_file Optional input for output file path, will write to input file if not specified
58   /// \return Status The status code returned
59   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<std::string> &value,
60                      const std::string &out_file = "") {
61     return UpdateArrayIF(StringToChar(in_file), StringToChar(key), VectorStringToChar(value), StringToChar(out_file));
62   }
63 
64   /// \brief Update a json file field with a vector of bool values
65   /// \param in_file The input file name to read in
66   /// \param key Key of field to write to
67   /// \param value Value array to write to file
68   /// \param out_file Optional parameter for output file path, will write to input file if not specified
69   /// \return Status The status code returned
70   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<bool> &value,
71                      const std::string &out_file = "") {
72     return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
73   }
74 
75   /// \brief Update a json file field with a vector of int8 values
76   /// \param in_file The input file name to read in
77   /// \param key Key of field to write to
78   /// \param value Value array to write to file
79   /// \param out_file Optional parameter for output file path, will write to input file if not specified
80   /// \return Status The status code returned
81   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int8_t> &value,
82                      const std::string &out_file = "") {
83     return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
84   }
85 
86   /// \brief Update a json file field with a vector of uint8 values
87   /// \param in_file The input file name to read in
88   /// \param key Key of field to write to
89   /// \param value Value array to write to file
90   /// \param out_file Optional parameter for output file path, will write to input file if not specified
91   /// \return Status The status code returned
92   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint8_t> &value,
93                      const std::string &out_file = "") {
94     return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
95   }
96 
97   /// \brief Update a json file field with a vector of int16 values
98   /// \param in_file The input file name to read in
99   /// \param key Key of field to write to
100   /// \param value Value array to write to file
101   /// \param out_file Optional parameter for output file path, will write to input file if not specified
102   /// \return Status The status code returned
103   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int16_t> &value,
104                      const std::string &out_file = "") {
105     return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
106   }
107 
108   /// \brief Update a json file field with a vector of uint16 values
109   /// \param in_file The input file name to read in
110   /// \param key Key of field to write to
111   /// \param value Value array to write to file
112   /// \param out_file Optional parameter for output file path, will write to input file if not specified
113   /// \return Status The status code returned
114   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint16_t> &value,
115                      const std::string &out_file = "") {
116     return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
117   }
118 
119   /// \brief Update a json file field with a vector of int32 values
120   /// \param in_file The input file name to read in
121   /// \param key Key of field to write to
122   /// \param value Value array to write to file
123   /// \param out_file Optional parameter for output file path, will write to input file if not specified
124   /// \return Status The status code returned
125   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int32_t> &value,
126                      const std::string &out_file = "") {
127     return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
128   }
129 
130   /// \brief Update a json file field with a vector of uint32 values
131   /// \param in_file The input file name to read in
132   /// \param key Key of field to write to
133   /// \param value Value array to write to file
134   /// \param out_file Optional parameter for output file path, will write to input file if not specified
135   /// \return Status The status code returned
136   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint32_t> &value,
137                      const std::string &out_file = "") {
138     return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
139   }
140 
141   /// \brief Update a json file field with a vector of int64 values
142   /// \param in_file The input file name to read in
143   /// \param key Key of field to write to
144   /// \param value Value array to write to file
145   /// \param out_file Optional parameter for output file path, will write to input file if not specified
146   /// \return Status The status code returned
147   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int64_t> &value,
148                      const std::string &out_file = "") {
149     return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
150   }
151 
152   /// \brief Update a json file field with a vector of uint64 values
153   /// \param in_file The input file name to read in
154   /// \param key Key of field to write to
155   /// \param value Value array to write to file
156   /// \param out_file Optional parameter for output file path, will write to input file if not specified
157   /// \return Status The status code returned
158   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint64_t> &value,
159                      const std::string &out_file = "") {
160     return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
161   }
162 
163   /// \brief Update a json file field with a vector of float values
164   /// \param in_file The input file name to read in
165   /// \param key Key of field to write to
166   /// \param value Value array to write to file
167   /// \param out_file Optional parameter for output file path, will write to input file if not specified
168   /// \return Status The status code returned
169   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<float> &value,
170                      const std::string &out_file = "") {
171     return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
172   }
173 
174   /// \brief Update a json file field with a vector of double values
175   /// \param in_file The input file name to read in
176   /// \param key Key of field to write to
177   /// \param value Value array to write to file
178   /// \param out_file Optional parameter for output file path, will write to input file if not specified
179   /// \return Status The status code returned
180   Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<double> &value,
181                      const std::string &out_file = "") {
182     return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
183   }
184 
185   /// \brief Update a json file field with a string value
186   /// \param in_file The input file name to read in
187   /// \param key Key of field to write to
188   /// \param value Value to write to file
189   /// \param out_file Optional parameter for output file path, will write to input file if not specified
190   /// \return Status The status code returned
191   Status UpdateValue(const std::string &in_file, const std::string &key, const std::string &value,
192                      const std::string &out_file = "") {
193     return UpdateValueIF(StringToChar(in_file), StringToChar(key), StringToChar(value), StringToChar(out_file));
194   }
195 
196   /// \brief Update a json file field with a bool value
197   /// \param in_file The input file name to read in
198   /// \param key Key of field to write to
199   /// \param value Value to write to file
200   /// \param out_file Optional parameter for output file path, will write to input file if not specified
201   /// \return Status The status code returned
202   Status UpdateValue(const std::string &in_file, const std::string &key, const bool &value,
203                      const std::string &out_file = "") {
204     return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
205   }
206 
207   /// \brief Update a json file field with an int8 value
208   /// \param in_file The input file name to read in
209   /// \param key Key of field to write to
210   /// \param value Value to write to file
211   /// \param out_file Optional parameter for output file path, will write to input file if not specified
212   /// \return Status The status code returned
213   Status UpdateValue(const std::string &in_file, const std::string &key, const int8_t &value,
214                      const std::string &out_file = "") {
215     return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
216   }
217 
218   /// \brief Update a json file field with an uint8 value
219   /// \param in_file The input file name to read in
220   /// \param key Key of field to write to
221   /// \param value Value to write to file
222   /// \param out_file Optional parameter for output file path, will write to input file if not specified
223   /// \return Status The status code returned
224   Status UpdateValue(const std::string &in_file, const std::string &key, const uint8_t &value,
225                      const std::string &out_file = "") {
226     return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
227   }
228 
229   /// \brief Update a json file field with an int16 value
230   /// \param in_file The input file name to read in
231   /// \param key Key of field to write to
232   /// \param value Value to write to file
233   /// \param out_file Optional parameter for output file path, will write to input file if not specified
234   /// \return Status The status code returned
235   Status UpdateValue(const std::string &in_file, const std::string &key, const int16_t &value,
236                      const std::string &out_file = "") {
237     return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
238   }
239 
240   /// \brief Update a json file field with an uint16 value
241   /// \param in_file The input file name to read in
242   /// \param key Key of field to write to
243   /// \param value Value to write to file
244   /// \param out_file Optional parameter for output file path, will write to input file if not specified
245   /// \return Status The status code returned
246   Status UpdateValue(const std::string &in_file, const std::string &key, const uint16_t &value,
247                      const std::string &out_file = "") {
248     return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
249   }
250 
251   /// \brief Update a json file field with an int32 value
252   /// \param in_file The input file name to read in
253   /// \param key Key of field to write to
254   /// \param value Value to write to file
255   /// \param out_file Optional parameter for output file path, will write to input file if not specified
256   /// \return Status The status code returned
257   Status UpdateValue(const std::string &in_file, const std::string &key, const int32_t &value,
258                      const std::string &out_file = "") {
259     return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
260   }
261 
262   /// \brief Update a json file field with an uint32 value
263   /// \param in_file The input file name to read in
264   /// \param key Key of field to write to
265   /// \param value Value to write to file
266   /// \param out_file Optional parameter for output file path, will write to input file if not specified
267   /// \return Status The status code returned
268   Status UpdateValue(const std::string &in_file, const std::string &key, const uint32_t &value,
269                      const std::string &out_file = "") {
270     return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
271   }
272 
273   /// \brief Update a json file field with an int64 value
274   /// \param in_file The input file name to read in
275   /// \param key Key of field to write to
276   /// \param value Value to write to file
277   /// \param out_file Optional parameter for output file path, will write to input file if not specified
278   /// \return Status The status code returned
279   Status UpdateValue(const std::string &in_file, const std::string &key, const int64_t &value,
280                      const std::string &out_file = "") {
281     return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
282   }
283 
284   /// \brief Update a json file field with an uint64 value
285   /// \param in_file The input file name to read in
286   /// \param key Key of field to write to
287   /// \param value Value to write to file
288   /// \param out_file Optional parameter for output file path, will write to input file if not specified
289   /// \return Status The status code returned
290   Status UpdateValue(const std::string &in_file, const std::string &key, const uint64_t &value,
291                      const std::string &out_file = "") {
292     return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
293   }
294 
295   /// \brief Update a json file field with a float value
296   /// \param in_file The input file name to read in
297   /// \param key Key of field to write to
298   /// \param value Value to write to file
299   /// \param out_file Optional parameter for output file path, will write to input file if not specified
300   /// \return Status The status code returned
301   Status UpdateValue(const std::string &in_file, const std::string &key, const float &value,
302                      const std::string &out_file = "") {
303     return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
304   }
305 
306   /// \brief Update a json file field with a double value
307   /// \param in_file The input file name to read in
308   /// \param key Key of field to write to
309   /// \param value Value to write to file
310   /// \param out_file Optional parameter for output file path, will write to input file if not specified
311   /// \return Status The status code returned
312   Status UpdateValue(const std::string &in_file, const std::string &key, const double &value,
313                      const std::string &out_file = "") {
314     return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
315   }
316 
317   /// \brief Template function to write tensor to file
318   /// \param[in] in_file File to write to
319   /// \param[in] data Array of type T values
320   /// \return Status The status code returned
321   template <typename T>
WriteBinFile(const std::string & in_file,const std::vector<T> & data)322   Status WriteBinFile(const std::string &in_file, const std::vector<T> &data) {
323     try {
324       std::ofstream o(in_file, std::ios::binary | std::ios::out);
325       if (!o.is_open()) {
326         return Status(kMDUnexpectedError, "Error opening Bin file to write");
327       }
328       size_t length = data.size();
329       if (length == 0) {
330         return Status(kMDUnexpectedError, "size of data is 0 when written into file.");
331       }
332       o.write(reinterpret_cast<const char *>(&data[0]), std::streamsize(length * sizeof(T)));
333       o.close();
334     }
335     // Catch any exception and convert to Status return code
336     catch (const std::exception &err) {
337       return Status(kMDUnexpectedError, "Write bin file failed ");
338     }
339     return Status::OK();
340   }
341 
342   /// \brief Write pointer to bin, use pointer to avoid memcpy
343   /// \note The value of `length`` must be equal to the length of `data`
344   /// \param[in] in_file File name to write to
345   /// \param[in] data Pointer to data
346   /// \param[in] length Length of values to write from pointer
347   /// \return Status The status code returned
348   template <typename T>
WriteBinFile(const std::string & in_file,T * data,size_t length)349   Status WriteBinFile(const std::string &in_file, T *data, size_t length) {
350     try {
351       if (data == nullptr) {
352         return Status(kMDUnexpectedError, "input data can not be null");
353       }
354       std::ofstream o(in_file, std::ios::binary | std::ios::out);
355       if (!o.is_open()) {
356         return Status(kMDUnexpectedError, "Error opening Bin file to write");
357       }
358       o.write(reinterpret_cast<const char *>(data), std::streamsize(length * sizeof(T)));
359       if (!o.good()) {
360         return Status(kMDUnexpectedError, "Error writing Bin file");
361       }
362       o.close();
363     }
364     // Catch any exception and convert to Status return code
365     catch (const std::exception &err) {
366       return Status(kMDUnexpectedError, "Write bin file failed");
367     }
368     return Status::OK();
369   }
370 
371   /// \brief Helper function to copy content of a tensor to buffer
372   /// \note This function iterates over the tensor in bytes, since
373   /// \param[in] tensor_addr The memory held by a tensor
374   /// \param[in] tensor_size The amount of data in bytes in tensor_addr, e.g. tensor->SizeInBytes()
375   /// \param[out] addr The address to copy tensor data to
376   /// \param[in] buffer_size The buffer size of addr
377   /// \return The size of the tensor (bytes copied
378   size_t DumpData(const unsigned char *tensor_addr, const size_t &tensor_size, void *addr, const size_t &buffer_size);
379 
380   /// \brief Helper function to delete key in json file
381   /// \note This function will return okay even if key not found
382   /// \param[in] in_file Json file to remove key from
383   /// \param[in] key The key to remove
384   /// \return Status The status code returned
385   Status RemoveKey(const std::string &in_file, const std::string &key, const std::string &out_file = "") {
386     return RemoveKeyIF(StringToChar(in_file), StringToChar(key), StringToChar(out_file));
387   }
388 
389   /// \brief A print method typically used for debugging
390   /// \param out - The output stream to write output to
391   void Print(std::ostream &out) const;
392 
393   /// \brief << Stream output operator overload
394   /// \note This allows you to write the debug print info using stream operators
395   /// \param out Reference to the output stream being overloaded
396   /// \param dh Reference to the DataSchema to display
397   /// \return The output stream must be returned
398   friend std::ostream &operator<<(std::ostream &out, const DataHelper &dh) {
399     dh.Print(out);
400     return out;
401   }
402 
403  private:
404   // Helper function for dual ABI support
405   Status CreateAlbumIF(const std::vector<char> &in_dir, const std::vector<char> &out_dir);
406   Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
407                        const std::vector<std::vector<char>> &value, const std::vector<char> &out_file);
408   Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<bool> &value,
409                        const std::vector<char> &out_file);
410   Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<int8_t> &value,
411                        const std::vector<char> &out_file);
412   Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
413                        const std::vector<uint8_t> &value, const std::vector<char> &out_file);
414   Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
415                        const std::vector<int16_t> &value, const std::vector<char> &out_file);
416   Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
417                        const std::vector<uint16_t> &value, const std::vector<char> &out_file);
418   Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
419                        const std::vector<int32_t> &value, const std::vector<char> &out_file);
420   Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
421                        const std::vector<uint32_t> &value, const std::vector<char> &out_file);
422   Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
423                        const std::vector<int64_t> &value, const std::vector<char> &out_file);
424   Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
425                        const std::vector<uint64_t> &value, const std::vector<char> &out_file);
426   Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<float> &value,
427                        const std::vector<char> &out_file);
428   Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<double> &value,
429                        const std::vector<char> &out_file);
430 
431   Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<char> &value,
432                        const std::vector<char> &out_file);
433   Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const bool &value,
434                        const std::vector<char> &out_file);
435   Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int8_t &value,
436                        const std::vector<char> &out_file);
437   Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint8_t &value,
438                        const std::vector<char> &out_file);
439   Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int16_t &value,
440                        const std::vector<char> &out_file);
441   Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint16_t &value,
442                        const std::vector<char> &out_file);
443   Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int32_t &value,
444                        const std::vector<char> &out_file);
445   Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint32_t &value,
446                        const std::vector<char> &out_file);
447   Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int64_t &value,
448                        const std::vector<char> &out_file);
449   Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint64_t &value,
450                        const std::vector<char> &out_file);
451   Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const float &value,
452                        const std::vector<char> &out_file);
453   Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const double &value,
454                        const std::vector<char> &out_file);
455   Status RemoveKeyIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<char> &out_file);
456 };
457 }  // namespace dataset
458 }  // namespace mindspore
459 
460 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_DATA_HELPER_H_
461